diff --git a/AuxIndexStructures.cpp b/AuxIndexStructures.cpp deleted file mode 100644 index e4e573878f..0000000000 --- a/AuxIndexStructures.cpp +++ /dev/null @@ -1,342 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include - -#include "AuxIndexStructures.h" - -#include "FaissAssert.h" - - -namespace faiss { - - -/*********************************************************************** - * RangeSearchResult - ***********************************************************************/ - -RangeSearchResult::RangeSearchResult (idx_t nq, bool alloc_lims): nq (nq) { - if (alloc_lims) { - lims = new size_t [nq + 1]; - memset (lims, 0, sizeof(*lims) * (nq + 1)); - } else { - lims = nullptr; - } - labels = nullptr; - distances = nullptr; - buffer_size = 1024 * 256; -} - -/// called when lims contains the nb of elements result entries -/// for each query -void RangeSearchResult::do_allocation () { - size_t ofs = 0; - for (int i = 0; i < nq; i++) { - size_t n = lims[i]; - lims [i] = ofs; - ofs += n; - } - lims [nq] = ofs; - labels = new idx_t [ofs]; - distances = new float [ofs]; -} - -RangeSearchResult::~RangeSearchResult () { - delete [] labels; - delete [] distances; - delete [] lims; -} - - - - - -/*********************************************************************** - * BufferList - ***********************************************************************/ - - -BufferList::BufferList (size_t buffer_size): - buffer_size (buffer_size) -{ - wp = buffer_size; -} - -BufferList::~BufferList () -{ - for (int i = 0; i < buffers.size(); i++) { - delete [] buffers[i].ids; - delete [] buffers[i].dis; - } -} - -void BufferList::add (idx_t id, float dis) { - if (wp == buffer_size) { // need new buffer - append_buffer(); - } - Buffer & buf = buffers.back(); - buf.ids [wp] = id; - buf.dis [wp] = dis; - wp++; -} - - -void BufferList::append_buffer () -{ - Buffer buf = {new idx_t [buffer_size], new float [buffer_size]}; - buffers.push_back (buf); - wp = 0; -} - -/// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to -/// tables dest_ids, dest_dis -void BufferList::copy_range (size_t ofs, size_t n, - idx_t * dest_ids, float *dest_dis) -{ - size_t bno = ofs / buffer_size; - ofs -= bno * buffer_size; - while (n > 0) { - size_t ncopy = ofs + n < buffer_size ? n : buffer_size - ofs; - Buffer buf = buffers [bno]; - memcpy (dest_ids, buf.ids + ofs, ncopy * sizeof(*dest_ids)); - memcpy (dest_dis, buf.dis + ofs, ncopy * sizeof(*dest_dis)); - dest_ids += ncopy; - dest_dis += ncopy; - ofs = 0; - bno ++; - n -= ncopy; - } -} - - -/*********************************************************************** - * RangeSearchPartialResult - ***********************************************************************/ - -void RangeQueryResult::add (float dis, idx_t id) { - nres++; - pres->add (id, dis); -} - - - -RangeSearchPartialResult::RangeSearchPartialResult (RangeSearchResult * res_in): - BufferList(res_in->buffer_size), - res(res_in) -{} - - -/// begin a new result -RangeQueryResult & - RangeSearchPartialResult::new_result (idx_t qno) -{ - RangeQueryResult qres = {qno, 0, this}; - queries.push_back (qres); - return queries.back(); -} - - -void RangeSearchPartialResult::finalize () -{ - set_lims (); -#pragma omp barrier - -#pragma omp single - res->do_allocation (); - -#pragma omp barrier - copy_result (); -} - - -/// called by range_search before do_allocation -void RangeSearchPartialResult::set_lims () -{ - for (int i = 0; i < queries.size(); i++) { - RangeQueryResult & qres = queries[i]; - res->lims[qres.qno] = qres.nres; - } -} - -/// called by range_search after do_allocation -void RangeSearchPartialResult::copy_result (bool incremental) -{ - size_t ofs = 0; - for (int i = 0; i < queries.size(); i++) { - RangeQueryResult & qres = queries[i]; - - copy_range (ofs, qres.nres, - res->labels + res->lims[qres.qno], - res->distances + res->lims[qres.qno]); - if (incremental) { - res->lims[qres.qno] += qres.nres; - } - ofs += qres.nres; - } -} - -void RangeSearchPartialResult::merge (std::vector & - partial_results, bool do_delete) -{ - - int npres = partial_results.size(); - if (npres == 0) return; - RangeSearchResult *result = partial_results[0]->res; - size_t nx = result->nq; - - // count - for (const RangeSearchPartialResult * pres : partial_results) { - if (!pres) continue; - for (const RangeQueryResult &qres : pres->queries) { - result->lims[qres.qno] += qres.nres; - } - } - result->do_allocation (); - for (int j = 0; j < npres; j++) { - if (!partial_results[j]) continue; - partial_results[j]->copy_result (true); - if (do_delete) { - delete partial_results[j]; - partial_results[j] = nullptr; - } - } - - // reset the limits - for (size_t i = nx; i > 0; i--) { - result->lims [i] = result->lims [i - 1]; - } - result->lims [0] = 0; -} - -/*********************************************************************** - * IDSelectorRange - ***********************************************************************/ - -IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax): - imin (imin), imax (imax) -{ -} - -bool IDSelectorRange::is_member (idx_t id) const -{ - return id >= imin && id < imax; -} - - -/*********************************************************************** - * IDSelectorBatch - ***********************************************************************/ - -IDSelectorBatch::IDSelectorBatch (size_t n, const idx_t *indices) -{ - nbits = 0; - while (n > (1L << nbits)) nbits++; - nbits += 5; - // for n = 1M, nbits = 25 is optimal, see P56659518 - - mask = (1L << nbits) - 1; - bloom.resize (1UL << (nbits - 3), 0); - for (long i = 0; i < n; i++) { - Index::idx_t id = indices[i]; - set.insert(id); - id &= mask; - bloom[id >> 3] |= 1 << (id & 7); - } -} - -bool IDSelectorBatch::is_member (idx_t i) const -{ - long im = i & mask; - if(!(bloom[im>>3] & (1 << (im & 7)))) { - return 0; - } - return set.count(i); -} - - -/*********************************************************************** - * IO functions - ***********************************************************************/ - - -int IOReader::fileno () -{ - FAISS_THROW_MSG ("IOReader does not support memory mapping"); -} - -int IOWriter::fileno () -{ - FAISS_THROW_MSG ("IOWriter does not support memory mapping"); -} - - -size_t VectorIOWriter::operator()( - const void *ptr, size_t size, size_t nitems) -{ - size_t o = data.size(); - data.resize(o + size * nitems); - memcpy (&data[o], ptr, size * nitems); - return nitems; -} - -size_t VectorIOReader::operator()( - void *ptr, size_t size, size_t nitems) -{ - if (rp >= data.size()) return 0; - size_t nremain = (data.size() - rp) / size; - if (nremain < nitems) nitems = nremain; - memcpy (ptr, &data[rp], size * nitems); - rp += size * nitems; - return nitems; -} - - -/*********************************************************** - * Interrupt callback - ***********************************************************/ - - -std::unique_ptr InterruptCallback::instance; - -std::mutex InterruptCallback::lock; - -void InterruptCallback::clear_instance () { - delete instance.release (); -} - -void InterruptCallback::check () { - if (!instance.get()) { - return; - } - if (instance->want_interrupt ()) { - FAISS_THROW_MSG ("computation interrupted"); - } -} - -bool InterruptCallback::is_interrupted () { - if (!instance.get()) { - return false; - } - std::lock_guard guard(lock); - return instance->want_interrupt(); -} - - -size_t InterruptCallback::get_period_hint (size_t flops) { - if (!instance.get()) { - return 1L << 30; // never check - } - // for 10M flops, it is reasonable to check once every 10 iterations - return std::max((size_t)10 * 10 * 1000 * 1000 / (flops + 1), (size_t)1); -} - - - - -} // namespace faiss diff --git a/AuxIndexStructures.h b/AuxIndexStructures.h deleted file mode 100644 index 37056729b2..0000000000 --- a/AuxIndexStructures.h +++ /dev/null @@ -1,286 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -// Auxiliary index structures, that are used in indexes but that can -// be forward-declared - -#ifndef FAISS_AUX_INDEX_STRUCTURES_H -#define FAISS_AUX_INDEX_STRUCTURES_H - -#include - -#include -#include -#include -#include - -#include "Index.h" - -namespace faiss { - -/** The objective is to have a simple result structure while - * minimizing the number of mem copies in the result. The method - * do_allocation can be overloaded to allocate the result tables in - * the matrix type of a scripting language like Lua or Python. */ -struct RangeSearchResult { - size_t nq; ///< nb of queries - size_t *lims; ///< size (nq + 1) - - typedef Index::idx_t idx_t; - - idx_t *labels; ///< result for query i is labels[lims[i]:lims[i+1]] - float *distances; ///< corresponding distances (not sorted) - - size_t buffer_size; ///< size of the result buffers used - - /// lims must be allocated on input to range_search. - explicit RangeSearchResult (idx_t nq, bool alloc_lims=true); - - /// called when lims contains the nb of elements result entries - /// for each query - virtual void do_allocation (); - - virtual ~RangeSearchResult (); -}; - - -/** Encapsulates a set of ids to remove. */ -struct IDSelector { - typedef Index::idx_t idx_t; - virtual bool is_member (idx_t id) const = 0; - virtual ~IDSelector() {} -}; - - - -/** remove ids between [imni, imax) */ -struct IDSelectorRange: IDSelector { - idx_t imin, imax; - - IDSelectorRange (idx_t imin, idx_t imax); - bool is_member(idx_t id) const override; - ~IDSelectorRange() override {} -}; - - -/** Remove ids from a set. Repetitions of ids in the indices set - * passed to the constructor does not hurt performance. The hash - * function used for the bloom filter and GCC's implementation of - * unordered_set are just the least significant bits of the id. This - * works fine for random ids or ids in sequences but will produce many - * hash collisions if lsb's are always the same */ -struct IDSelectorBatch: IDSelector { - - std::unordered_set set; - - typedef unsigned char uint8_t; - std::vector bloom; // assumes low bits of id are a good hash value - int nbits; - idx_t mask; - - IDSelectorBatch (size_t n, const idx_t *indices); - bool is_member(idx_t id) const override; - ~IDSelectorBatch() override {} -}; - -/**************************************************************** - * Result structures for range search. - * - * The main constraint here is that we want to support parallel - * queries from different threads in various ways: 1 thread per query, - * several threads per query. We store the actual results in blocks of - * fixed size rather than exponentially increasing memory. At the end, - * we copy the block content to a linear result array. - *****************************************************************/ - -/** List of temporary buffers used to store results before they are - * copied to the RangeSearchResult object. */ -struct BufferList { - typedef Index::idx_t idx_t; - - // buffer sizes in # entries - size_t buffer_size; - - struct Buffer { - idx_t *ids; - float *dis; - }; - - std::vector buffers; - size_t wp; ///< write pointer in the last buffer. - - explicit BufferList (size_t buffer_size); - - ~BufferList (); - - /// create a new buffer - void append_buffer (); - - /// add one result, possibly appending a new buffer if needed - void add (idx_t id, float dis); - - /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to - /// tables dest_ids, dest_dis - void copy_range (size_t ofs, size_t n, - idx_t * dest_ids, float *dest_dis); - -}; - -struct RangeSearchPartialResult; - -/// result structure for a single query -struct RangeQueryResult { - using idx_t = Index::idx_t; - idx_t qno; //< id of the query - size_t nres; //< nb of results for this query - RangeSearchPartialResult * pres; - - /// called by search function to report a new result - void add (float dis, idx_t id); -}; - -/// the entries in the buffers are split per query -struct RangeSearchPartialResult: BufferList { - RangeSearchResult * res; - - /// eventually the result will be stored in res_in - explicit RangeSearchPartialResult (RangeSearchResult * res_in); - - /// query ids + nb of results per query. - std::vector queries; - - /// begin a new result - RangeQueryResult & new_result (idx_t qno); - - /***************************************** - * functions used at the end of the search to merge the result - * lists */ - void finalize (); - - /// called by range_search before do_allocation - void set_lims (); - - /// called by range_search after do_allocation - void copy_result (bool incremental = false); - - /// merge a set of PartialResult's into one RangeSearchResult - /// on ouptut the partialresults are empty! - static void merge (std::vector & - partial_results, bool do_delete=true); - -}; - -/*********************************************************** - * Abstract I/O objects - ***********************************************************/ - -struct IOReader { - // name that can be used in error messages - std::string name; - - // fread - virtual size_t operator()( - void *ptr, size_t size, size_t nitems) = 0; - - // return a file number that can be memory-mapped - virtual int fileno (); - - virtual ~IOReader() {} -}; - -struct IOWriter { - // name that can be used in error messages - std::string name; - - // fwrite - virtual size_t operator()( - const void *ptr, size_t size, size_t nitems) = 0; - - // return a file number that can be memory-mapped - virtual int fileno (); - - virtual ~IOWriter() {} -}; - - -struct VectorIOReader:IOReader { - std::vector data; - size_t rp = 0; - size_t operator()(void *ptr, size_t size, size_t nitems) override; -}; - -struct VectorIOWriter:IOWriter { - std::vector data; - size_t operator()(const void *ptr, size_t size, size_t nitems) override; -}; - -/*********************************************************** - * The distance computer maintains a current query and computes - * distances to elements in an index that supports random access. - * - * The DistanceComputer is not intended to be thread-safe (eg. because - * it maintains counters) so the distance functions are not const, - * instanciate one from each thread if needed. - ***********************************************************/ -struct DistanceComputer { - using idx_t = Index::idx_t; - - /// called before computing distances - virtual void set_query(const float *x) = 0; - - /// compute distance of vector i to current query - virtual float operator () (idx_t i) = 0; - - /// compute distance between two stored vectors - virtual float symmetric_dis (idx_t i, idx_t j) = 0; - - virtual ~DistanceComputer() {} -}; - -/*********************************************************** - * Interrupt callback - ***********************************************************/ - -struct InterruptCallback { - virtual bool want_interrupt () = 0; - virtual ~InterruptCallback() {} - - // lock that protects concurrent calls to is_interrupted - static std::mutex lock; - - static std::unique_ptr instance; - - static void clear_instance (); - - /** check if: - * - an interrupt callback is set - * - the callback retuns true - * if this is the case, then throw an exception. Should not be called - * from multiple threds. - */ - static void check (); - - /// same as check() but return true if is interrupted instead of - /// throwing. Can be called from multiple threads. - static bool is_interrupted (); - - /** assuming each iteration takes a certain number of flops, what - * is a reasonable interval to check for interrupts? - */ - static size_t get_period_hint (size_t flops); - -}; - - - -}; // namespace faiss - - - -#endif diff --git a/FaissAssert.h b/FaissAssert.h deleted file mode 100644 index 64a0eafc9a..0000000000 --- a/FaissAssert.h +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#ifndef FAISS_ASSERT_INCLUDED -#define FAISS_ASSERT_INCLUDED - -#include "FaissException.h" -#include -#include -#include - -/// -/// Assertions -/// - -#define FAISS_ASSERT(X) \ - do { \ - if (! (X)) { \ - fprintf(stderr, "Faiss assertion '%s' failed in %s " \ - "at %s:%d\n", \ - #X, __PRETTY_FUNCTION__, __FILE__, __LINE__); \ - abort(); \ - } \ - } while (false) - -#define FAISS_ASSERT_MSG(X, MSG) \ - do { \ - if (! (X)) { \ - fprintf(stderr, "Faiss assertion '%s' failed in %s " \ - "at %s:%d; details: " MSG "\n", \ - #X, __PRETTY_FUNCTION__, __FILE__, __LINE__); \ - abort(); \ - } \ - } while (false) - -#define FAISS_ASSERT_FMT(X, FMT, ...) \ - do { \ - if (! (X)) { \ - fprintf(stderr, "Faiss assertion '%s' failed in %s " \ - "at %s:%d; details: " FMT "\n", \ - #X, __PRETTY_FUNCTION__, __FILE__, __LINE__, __VA_ARGS__); \ - abort(); \ - } \ - } while (false) - -/// -/// Exceptions for returning user errors -/// - -#define FAISS_THROW_MSG(MSG) \ - do { \ - throw faiss::FaissException(MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__); \ - } while (false) - -#define FAISS_THROW_FMT(FMT, ...) \ - do { \ - std::string __s; \ - int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__); \ - __s.resize(__size + 1); \ - snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__); \ - throw faiss::FaissException(__s, __PRETTY_FUNCTION__, __FILE__, __LINE__); \ - } while (false) - -/// -/// Exceptions thrown upon a conditional failure -/// - -#define FAISS_THROW_IF_NOT(X) \ - do { \ - if (!(X)) { \ - FAISS_THROW_FMT("Error: '%s' failed", #X); \ - } \ - } while (false) - -#define FAISS_THROW_IF_NOT_MSG(X, MSG) \ - do { \ - if (!(X)) { \ - FAISS_THROW_FMT("Error: '%s' failed: " MSG, #X); \ - } \ - } while (false) - -#define FAISS_THROW_IF_NOT_FMT(X, FMT, ...) \ - do { \ - if (!(X)) { \ - FAISS_THROW_FMT("Error: '%s' failed: " FMT, #X, __VA_ARGS__); \ - } \ - } while (false) - -#endif diff --git a/FaissException.cpp b/FaissException.cpp deleted file mode 100644 index ce3de0fc15..0000000000 --- a/FaissException.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "FaissException.h" -#include - -namespace faiss { - -FaissException::FaissException(const std::string& m) - : msg(m) { -} - -FaissException::FaissException(const std::string& m, - const char* funcName, - const char* file, - int line) { - int size = snprintf(nullptr, 0, "Error in %s at %s:%d: %s", - funcName, file, line, m.c_str()); - msg.resize(size + 1); - snprintf(&msg[0], msg.size(), "Error in %s at %s:%d: %s", - funcName, file, line, m.c_str()); -} - -const char* -FaissException::what() const noexcept { - return msg.c_str(); -} - -void handleExceptions( - std::vector>& exceptions) { - if (exceptions.size() == 1) { - // throw the single received exception directly - std::rethrow_exception(exceptions.front().second); - - } else if (exceptions.size() > 1) { - // multiple exceptions; aggregate them and return a single exception - std::stringstream ss; - - for (auto& p : exceptions) { - try { - std::rethrow_exception(p.second); - } catch (std::exception& ex) { - if (ex.what()) { - // exception message available - ss << "Exception thrown from index " << p.first << ": " - << ex.what() << "\n"; - } else { - // No message available - ss << "Unknown exception thrown from index " << p.first << "\n"; - } - } catch (...) { - ss << "Unknown exception thrown from index " << p.first << "\n"; - } - } - - throw FaissException(ss.str()); - } -} - -} diff --git a/FaissException.h b/FaissException.h deleted file mode 100644 index 9d54edbad5..0000000000 --- a/FaissException.h +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#ifndef FAISS_EXCEPTION_INCLUDED -#define FAISS_EXCEPTION_INCLUDED - -#include -#include -#include -#include - -namespace faiss { - -/// Base class for Faiss exceptions -class FaissException : public std::exception { - public: - explicit FaissException(const std::string& msg); - - FaissException(const std::string& msg, - const char* funcName, - const char* file, - int line); - - /// from std::exception - const char* what() const noexcept override; - - std::string msg; -}; - -/// Handle multiple exceptions from worker threads, throwing an appropriate -/// exception that aggregates the information -/// The pair int is the thread that generated the exception -void -handleExceptions(std::vector>& exceptions); - -/** bare-bones unique_ptr - * this one deletes with delete [] */ -template -struct ScopeDeleter { - const T * ptr; - explicit ScopeDeleter (const T* ptr = nullptr): ptr (ptr) {} - void release () {ptr = nullptr; } - void set (const T * ptr_in) { ptr = ptr_in; } - void swap (ScopeDeleter &other) {std::swap (ptr, other.ptr); } - ~ScopeDeleter () { - delete [] ptr; - } -}; - -/** same but deletes with the simple delete (least common case) */ -template -struct ScopeDeleter1 { - const T * ptr; - explicit ScopeDeleter1 (const T* ptr = nullptr): ptr (ptr) {} - void release () {ptr = nullptr; } - void set (const T * ptr_in) { ptr = ptr_in; } - void swap (ScopeDeleter1 &other) {std::swap (ptr, other.ptr); } - ~ScopeDeleter1 () { - delete ptr; - } -}; - -} - -#endif diff --git a/HNSW.cpp b/HNSW.cpp deleted file mode 100644 index 28ccdcbe44..0000000000 --- a/HNSW.cpp +++ /dev/null @@ -1,815 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "HNSW.h" -#include "AuxIndexStructures.h" - -namespace faiss { - -using idx_t = Index::idx_t; - -/************************************************************** - * HNSW structure implementation - **************************************************************/ - -int HNSW::nb_neighbors(int layer_no) const -{ - return cum_nneighbor_per_level[layer_no + 1] - - cum_nneighbor_per_level[layer_no]; -} - -void HNSW::set_nb_neighbors(int level_no, int n) -{ - FAISS_THROW_IF_NOT(levels.size() == 0); - int cur_n = nb_neighbors(level_no); - for (int i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) { - cum_nneighbor_per_level[i] += n - cur_n; - } -} - -int HNSW::cum_nb_neighbors(int layer_no) const -{ - return cum_nneighbor_per_level[layer_no]; -} - -void HNSW::neighbor_range(idx_t no, int layer_no, - size_t * begin, size_t * end) const -{ - size_t o = offsets[no]; - *begin = o + cum_nb_neighbors(layer_no); - *end = o + cum_nb_neighbors(layer_no + 1); -} - - - -HNSW::HNSW(int M) : rng(12345) { - set_default_probas(M, 1.0 / log(M)); - max_level = -1; - entry_point = -1; - efSearch = 16; - efConstruction = 40; - upper_beam = 1; - offsets.push_back(0); -} - - -int HNSW::random_level() -{ - double f = rng.rand_float(); - // could be a bit faster with bissection - for (int level = 0; level < assign_probas.size(); level++) { - if (f < assign_probas[level]) { - return level; - } - f -= assign_probas[level]; - } - // happens with exponentially low probability - return assign_probas.size() - 1; -} - -void HNSW::set_default_probas(int M, float levelMult) -{ - int nn = 0; - cum_nneighbor_per_level.push_back (0); - for (int level = 0; ;level++) { - float proba = exp(-level / levelMult) * (1 - exp(-1 / levelMult)); - if (proba < 1e-9) break; - assign_probas.push_back(proba); - nn += level == 0 ? M * 2 : M; - cum_nneighbor_per_level.push_back (nn); - } -} - -void HNSW::clear_neighbor_tables(int level) -{ - for (int i = 0; i < levels.size(); i++) { - size_t begin, end; - neighbor_range(i, level, &begin, &end); - for (size_t j = begin; j < end; j++) { - neighbors[j] = -1; - } - } -} - - -void HNSW::reset() { - max_level = -1; - entry_point = -1; - offsets.clear(); - offsets.push_back(0); - levels.clear(); - neighbors.clear(); -} - - - -void HNSW::print_neighbor_stats(int level) const -{ - FAISS_THROW_IF_NOT (level < cum_nneighbor_per_level.size()); - printf("stats on level %d, max %d neighbors per vertex:\n", - level, nb_neighbors(level)); - size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0; -#pragma omp parallel for reduction(+: tot_neigh) reduction(+: tot_common) \ - reduction(+: tot_reciprocal) reduction(+: n_node) - for (int i = 0; i < levels.size(); i++) { - if (levels[i] > level) { - n_node++; - size_t begin, end; - neighbor_range(i, level, &begin, &end); - std::unordered_set neighset; - for (size_t j = begin; j < end; j++) { - if (neighbors [j] < 0) break; - neighset.insert(neighbors[j]); - } - int n_neigh = neighset.size(); - int n_common = 0; - int n_reciprocal = 0; - for (size_t j = begin; j < end; j++) { - storage_idx_t i2 = neighbors[j]; - if (i2 < 0) break; - FAISS_ASSERT(i2 != i); - size_t begin2, end2; - neighbor_range(i2, level, &begin2, &end2); - for (size_t j2 = begin2; j2 < end2; j2++) { - storage_idx_t i3 = neighbors[j2]; - if (i3 < 0) break; - if (i3 == i) { - n_reciprocal++; - continue; - } - if (neighset.count(i3)) { - neighset.erase(i3); - n_common++; - } - } - } - tot_neigh += n_neigh; - tot_common += n_common; - tot_reciprocal += n_reciprocal; - } - } - float normalizer = n_node; - printf(" nb of nodes at that level %ld\n", n_node); - printf(" neighbors per node: %.2f (%ld)\n", - tot_neigh / normalizer, tot_neigh); - printf(" nb of reciprocal neighbors: %.2f\n", tot_reciprocal / normalizer); - printf(" nb of neighbors that are also neighbor-of-neighbors: %.2f (%ld)\n", - tot_common / normalizer, tot_common); - - - -} - - -void HNSW::fill_with_random_links(size_t n) -{ - int max_level = prepare_level_tab(n); - RandomGenerator rng2(456); - - for (int level = max_level - 1; level >= 0; --level) { - std::vector elts; - for (int i = 0; i < n; i++) { - if (levels[i] > level) { - elts.push_back(i); - } - } - printf ("linking %ld elements in level %d\n", - elts.size(), level); - - if (elts.size() == 1) continue; - - for (int ii = 0; ii < elts.size(); ii++) { - int i = elts[ii]; - size_t begin, end; - neighbor_range(i, 0, &begin, &end); - for (size_t j = begin; j < end; j++) { - int other = 0; - do { - other = elts[rng2.rand_int(elts.size())]; - } while(other == i); - - neighbors[j] = other; - } - } - } -} - - -int HNSW::prepare_level_tab(size_t n, bool preset_levels) -{ - size_t n0 = offsets.size() - 1; - - if (preset_levels) { - FAISS_ASSERT (n0 + n == levels.size()); - } else { - FAISS_ASSERT (n0 == levels.size()); - for (int i = 0; i < n; i++) { - int pt_level = random_level(); - levels.push_back(pt_level + 1); - } - } - - int max_level = 0; - for (int i = 0; i < n; i++) { - int pt_level = levels[i + n0] - 1; - if (pt_level > max_level) max_level = pt_level; - offsets.push_back(offsets.back() + - cum_nb_neighbors(pt_level + 1)); - neighbors.resize(offsets.back(), -1); - } - - return max_level; -} - - -/** Enumerate vertices from farthest to nearest from query, keep a - * neighbor only if there is no previous neighbor that is closer to - * that vertex than the query. - */ -void HNSW::shrink_neighbor_list( - DistanceComputer& qdis, - std::priority_queue& input, - std::vector& output, - int max_size) -{ - while (input.size() > 0) { - NodeDistFarther v1 = input.top(); - input.pop(); - float dist_v1_q = v1.d; - - bool good = true; - for (NodeDistFarther v2 : output) { - float dist_v1_v2 = qdis.symmetric_dis(v2.id, v1.id); - - if (dist_v1_v2 < dist_v1_q) { - good = false; - break; - } - } - - if (good) { - output.push_back(v1); - if (output.size() >= max_size) { - return; - } - } - } -} - - -namespace { - - -using storage_idx_t = HNSW::storage_idx_t; -using NodeDistCloser = HNSW::NodeDistCloser; -using NodeDistFarther = HNSW::NodeDistFarther; - - -/************************************************************** - * Addition subroutines - **************************************************************/ - - -/// remove neighbors from the list to make it smaller than max_size -void shrink_neighbor_list( - DistanceComputer& qdis, - std::priority_queue& resultSet1, - int max_size) -{ - if (resultSet1.size() < max_size) { - return; - } - std::priority_queue resultSet; - std::vector returnlist; - - while (resultSet1.size() > 0) { - resultSet.emplace(resultSet1.top().d, resultSet1.top().id); - resultSet1.pop(); - } - - HNSW::shrink_neighbor_list(qdis, resultSet, returnlist, max_size); - - for (NodeDistFarther curen2 : returnlist) { - resultSet1.emplace(curen2.d, curen2.id); - } - -} - - -/// add a link between two elements, possibly shrinking the list -/// of links to make room for it. -void add_link(HNSW& hnsw, - DistanceComputer& qdis, - storage_idx_t src, storage_idx_t dest, - int level) -{ - size_t begin, end; - hnsw.neighbor_range(src, level, &begin, &end); - if (hnsw.neighbors[end - 1] == -1) { - // there is enough room, find a slot to add it - size_t i = end; - while(i > begin) { - if (hnsw.neighbors[i - 1] != -1) break; - i--; - } - hnsw.neighbors[i] = dest; - return; - } - - // otherwise we let them fight out which to keep - - // copy to resultSet... - std::priority_queue resultSet; - resultSet.emplace(qdis.symmetric_dis(src, dest), dest); - for (size_t i = begin; i < end; i++) { // HERE WAS THE BUG - storage_idx_t neigh = hnsw.neighbors[i]; - resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh); - } - - shrink_neighbor_list(qdis, resultSet, end - begin); - - // ...and back - size_t i = begin; - while (resultSet.size()) { - hnsw.neighbors[i++] = resultSet.top().id; - resultSet.pop(); - } - // they may have shrunk more than just by 1 element - while(i < end) { - hnsw.neighbors[i++] = -1; - } -} - -/// search neighbors on a single level, starting from an entry point -void search_neighbors_to_add( - HNSW& hnsw, - DistanceComputer& qdis, - std::priority_queue& results, - int entry_point, - float d_entry_point, - int level, - VisitedTable &vt) -{ - // top is nearest candidate - std::priority_queue candidates; - - NodeDistFarther ev(d_entry_point, entry_point); - candidates.push(ev); - results.emplace(d_entry_point, entry_point); - vt.set(entry_point); - - while (!candidates.empty()) { - // get nearest - const NodeDistFarther &currEv = candidates.top(); - - if (currEv.d > results.top().d) { - break; - } - int currNode = currEv.id; - candidates.pop(); - - // loop over neighbors - size_t begin, end; - hnsw.neighbor_range(currNode, level, &begin, &end); - for(size_t i = begin; i < end; i++) { - storage_idx_t nodeId = hnsw.neighbors[i]; - if (nodeId < 0) break; - if (vt.get(nodeId)) continue; - vt.set(nodeId); - - float dis = qdis(nodeId); - NodeDistFarther evE1(dis, nodeId); - - if (results.size() < hnsw.efConstruction || - results.top().d > dis) { - - results.emplace(dis, nodeId); - candidates.emplace(dis, nodeId); - if (results.size() > hnsw.efConstruction) { - results.pop(); - } - } - } - } - vt.advance(); -} - - -/************************************************************** - * Searching subroutines - **************************************************************/ - -/// greedily update a nearest vector at a given level -void greedy_update_nearest(const HNSW& hnsw, - DistanceComputer& qdis, - int level, - storage_idx_t& nearest, - float& d_nearest) -{ - for(;;) { - storage_idx_t prev_nearest = nearest; - - size_t begin, end; - hnsw.neighbor_range(nearest, level, &begin, &end); - for(size_t i = begin; i < end; i++) { - storage_idx_t v = hnsw.neighbors[i]; - if (v < 0) break; - float dis = qdis(v); - if (dis < d_nearest) { - nearest = v; - d_nearest = dis; - } - } - if (nearest == prev_nearest) { - return; - } - } -} - - -} // namespace - - -/// Finds neighbors and builds links with them, starting from an entry -/// point. The own neighbor list is assumed to be locked. -void HNSW::add_links_starting_from(DistanceComputer& ptdis, - storage_idx_t pt_id, - storage_idx_t nearest, - float d_nearest, - int level, - omp_lock_t *locks, - VisitedTable &vt) -{ - std::priority_queue link_targets; - - search_neighbors_to_add(*this, ptdis, link_targets, nearest, d_nearest, - level, vt); - - // but we can afford only this many neighbors - int M = nb_neighbors(level); - - ::faiss::shrink_neighbor_list(ptdis, link_targets, M); - - while (!link_targets.empty()) { - int other_id = link_targets.top().id; - - omp_set_lock(&locks[other_id]); - add_link(*this, ptdis, other_id, pt_id, level); - omp_unset_lock(&locks[other_id]); - - add_link(*this, ptdis, pt_id, other_id, level); - - link_targets.pop(); - } -} - - -/************************************************************** - * Building, parallel - **************************************************************/ - -void HNSW::add_with_locks(DistanceComputer& ptdis, int pt_level, int pt_id, - std::vector& locks, - VisitedTable& vt) -{ - // greedy search on upper levels - - storage_idx_t nearest; -#pragma omp critical - { - nearest = entry_point; - - if (nearest == -1) { - max_level = pt_level; - entry_point = pt_id; - } - } - - if (nearest < 0) { - return; - } - - omp_set_lock(&locks[pt_id]); - - int level = max_level; // level at which we start adding neighbors - float d_nearest = ptdis(nearest); - - for(; level > pt_level; level--) { - greedy_update_nearest(*this, ptdis, level, nearest, d_nearest); - } - - for(; level >= 0; level--) { - add_links_starting_from(ptdis, pt_id, nearest, d_nearest, - level, locks.data(), vt); - } - - omp_unset_lock(&locks[pt_id]); - - if (pt_level > max_level) { - max_level = pt_level; - entry_point = pt_id; - } -} - - -/** Do a BFS on the candidates list */ - -int HNSW::search_from_candidates( - DistanceComputer& qdis, int k, - idx_t *I, float *D, - MinimaxHeap& candidates, - VisitedTable& vt, - int level, int nres_in) const -{ - int nres = nres_in; - int ndis = 0; - for (int i = 0; i < candidates.size(); i++) { - idx_t v1 = candidates.ids[i]; - float d = candidates.dis[i]; - FAISS_ASSERT(v1 >= 0); - if (nres < k) { - faiss::maxheap_push(++nres, D, I, d, v1); - } else if (d < D[0]) { - faiss::maxheap_pop(nres--, D, I); - faiss::maxheap_push(++nres, D, I, d, v1); - } - vt.set(v1); - } - - bool do_dis_check = check_relative_distance; - int nstep = 0; - - while (candidates.size() > 0) { - float d0 = 0; - int v0 = candidates.pop_min(&d0); - - if (do_dis_check) { - // tricky stopping condition: there are more that ef - // distances that are processed already that are smaller - // than d0 - - int n_dis_below = candidates.count_below(d0); - if(n_dis_below >= efSearch) { - break; - } - } - - size_t begin, end; - neighbor_range(v0, level, &begin, &end); - - for (size_t j = begin; j < end; j++) { - int v1 = neighbors[j]; - if (v1 < 0) break; - if (vt.get(v1)) { - continue; - } - vt.set(v1); - ndis++; - float d = qdis(v1); - if (nres < k) { - faiss::maxheap_push(++nres, D, I, d, v1); - } else if (d < D[0]) { - faiss::maxheap_pop(nres--, D, I); - faiss::maxheap_push(++nres, D, I, d, v1); - } - candidates.push(v1, d); - } - - nstep++; - if (!do_dis_check && nstep > efSearch) { - break; - } - } - - if (level == 0) { -#pragma omp critical - { - hnsw_stats.n1 ++; - if (candidates.size() == 0) { - hnsw_stats.n2 ++; - } - hnsw_stats.n3 += ndis; - } - } - - return nres; -} - - -/************************************************************** - * Searching - **************************************************************/ - -std::priority_queue HNSW::search_from_candidate_unbounded( - const Node& node, - DistanceComputer& qdis, - int ef, - VisitedTable *vt) const -{ - int ndis = 0; - std::priority_queue top_candidates; - std::priority_queue, std::greater> candidates; - - top_candidates.push(node); - candidates.push(node); - - vt->set(node.second); - - while (!candidates.empty()) { - float d0; - storage_idx_t v0; - std::tie(d0, v0) = candidates.top(); - - if (d0 > top_candidates.top().first) { - break; - } - - candidates.pop(); - - size_t begin, end; - neighbor_range(v0, 0, &begin, &end); - - for (size_t j = begin; j < end; ++j) { - int v1 = neighbors[j]; - - if (v1 < 0) { - break; - } - if (vt->get(v1)) { - continue; - } - - vt->set(v1); - - float d1 = qdis(v1); - ++ndis; - - if (top_candidates.top().first > d1 || top_candidates.size() < ef) { - candidates.emplace(d1, v1); - top_candidates.emplace(d1, v1); - - if (top_candidates.size() > ef) { - top_candidates.pop(); - } - } - } - } - -#pragma omp critical - { - ++hnsw_stats.n1; - if (candidates.size() == 0) { - ++hnsw_stats.n2; - } - hnsw_stats.n3 += ndis; - } - - return top_candidates; -} - -void HNSW::search(DistanceComputer& qdis, int k, - idx_t *I, float *D, - VisitedTable& vt) const -{ - if (upper_beam == 1) { - - // greedy search on upper levels - storage_idx_t nearest = entry_point; - float d_nearest = qdis(nearest); - - for(int level = max_level; level >= 1; level--) { - greedy_update_nearest(*this, qdis, level, nearest, d_nearest); - } - - int ef = std::max(efSearch, k); - if (search_bounded_queue) { - MinimaxHeap candidates(ef); - - candidates.push(nearest, d_nearest); - - search_from_candidates(qdis, k, I, D, candidates, vt, 0); - } else { - std::priority_queue top_candidates = - search_from_candidate_unbounded(Node(d_nearest, nearest), - qdis, ef, &vt); - - while (top_candidates.size() > k) { - top_candidates.pop(); - } - - int nres = 0; - while (!top_candidates.empty()) { - float d; - storage_idx_t label; - std::tie(d, label) = top_candidates.top(); - faiss::maxheap_push(++nres, D, I, d, label); - top_candidates.pop(); - } - } - - vt.advance(); - - } else { - int candidates_size = upper_beam; - MinimaxHeap candidates(candidates_size); - - std::vector I_to_next(candidates_size); - std::vector D_to_next(candidates_size); - - int nres = 1; - I_to_next[0] = entry_point; - D_to_next[0] = qdis(entry_point); - - for(int level = max_level; level >= 0; level--) { - - // copy I, D -> candidates - - candidates.clear(); - - for (int i = 0; i < nres; i++) { - candidates.push(I_to_next[i], D_to_next[i]); - } - - if (level == 0) { - nres = search_from_candidates(qdis, k, I, D, candidates, vt, 0); - } else { - nres = search_from_candidates( - qdis, candidates_size, - I_to_next.data(), D_to_next.data(), - candidates, vt, level - ); - } - vt.advance(); - } - } -} - - -void HNSW::MinimaxHeap::push(storage_idx_t i, float v) { - if (k == n) { - if (v >= dis[0]) return; - faiss::heap_pop (k--, dis.data(), ids.data()); - --nvalid; - } - faiss::heap_push (++k, dis.data(), ids.data(), v, i); - ++nvalid; -} - -float HNSW::MinimaxHeap::max() const { - return dis[0]; -} - -int HNSW::MinimaxHeap::size() const { - return nvalid; -} - -void HNSW::MinimaxHeap::clear() { - nvalid = k = 0; -} - -int HNSW::MinimaxHeap::pop_min(float *vmin_out) { - assert(k > 0); - // returns min. This is an O(n) operation - int i = k - 1; - while (i >= 0) { - if (ids[i] != -1) break; - i--; - } - if (i == -1) return -1; - int imin = i; - float vmin = dis[i]; - i--; - while(i >= 0) { - if (ids[i] != -1 && dis[i] < vmin) { - vmin = dis[i]; - imin = i; - } - i--; - } - if (vmin_out) *vmin_out = vmin; - int ret = ids[imin]; - ids[imin] = -1; - --nvalid; - - return ret; -} - -int HNSW::MinimaxHeap::count_below(float thresh) { - int n_below = 0; - for(int i = 0; i < k; i++) { - if (dis[i] < thresh) { - n_below++; - } - } - - return n_below; -} - - -} // namespace faiss diff --git a/HNSW.h b/HNSW.h deleted file mode 100644 index bb25006efd..0000000000 --- a/HNSW.h +++ /dev/null @@ -1,274 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#pragma once - -#include -#include -#include - -#include - -#include "Index.h" -#include "FaissAssert.h" -#include "utils.h" - - -namespace faiss { - - -/** Implementation of the Hierarchical Navigable Small World - * datastructure. - * - * Efficient and robust approximate nearest neighbor search using - * Hierarchical Navigable Small World graphs - * - * Yu. A. Malkov, D. A. Yashunin, arXiv 2017 - * - * This implmentation is heavily influenced by the NMSlib - * implementation by Yury Malkov and Leonid Boystov - * (https://github.com/searchivarius/nmslib) - * - * The HNSW object stores only the neighbor link structure, see - * IndexHNSW.h for the full index object. - */ - - -struct VisitedTable; -struct DistanceComputer; // from AuxIndexStructures - -struct HNSW { - /// internal storage of vectors (32 bits: this is expensive) - typedef int storage_idx_t; - - /// Faiss results are 64-bit - typedef Index::idx_t idx_t; - - typedef std::pair Node; - - /** Heap structure that allows fast - */ - struct MinimaxHeap { - int n; - int k; - int nvalid; - - std::vector ids; - std::vector dis; - typedef faiss::CMax HC; - - explicit MinimaxHeap(int n): n(n), k(0), nvalid(0), ids(n), dis(n) {} - - void push(storage_idx_t i, float v); - - float max() const; - - int size() const; - - void clear(); - - int pop_min(float *vmin_out = nullptr); - - int count_below(float thresh); - }; - - - /// to sort pairs of (id, distance) from nearest to fathest or the reverse - struct NodeDistCloser { - float d; - int id; - NodeDistCloser(float d, int id): d(d), id(id) {} - bool operator < (const NodeDistCloser &obj1) const { return d < obj1.d; } - }; - - struct NodeDistFarther { - float d; - int id; - NodeDistFarther(float d, int id): d(d), id(id) {} - bool operator < (const NodeDistFarther &obj1) const { return d > obj1.d; } - }; - - - /// assignment probability to each layer (sum=1) - std::vector assign_probas; - - /// number of neighbors stored per layer (cumulative), should not - /// be changed after first add - std::vector cum_nneighbor_per_level; - - /// level of each vector (base level = 1), size = ntotal - std::vector levels; - - /// offsets[i] is the offset in the neighbors array where vector i is stored - /// size ntotal + 1 - std::vector offsets; - - /// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i - /// for all levels. this is where all storage goes. - std::vector neighbors; - - /// entry point in the search structure (one of the points with maximum level - storage_idx_t entry_point; - - faiss::RandomGenerator rng; - - /// maximum level - int max_level; - - /// expansion factor at construction time - int efConstruction; - - /// expansion factor at search time - int efSearch; - - /// during search: do we check whether the next best distance is good enough? - bool check_relative_distance = true; - - /// number of entry points in levels > 0. - int upper_beam; - - /// use bounded queue during exploration - bool search_bounded_queue = true; - - // methods that initialize the tree sizes - - /// initialize the assign_probas and cum_nneighbor_per_level to - /// have 2*M links on level 0 and M links on levels > 0 - void set_default_probas(int M, float levelMult); - - /// set nb of neighbors for this level (before adding anything) - void set_nb_neighbors(int level_no, int n); - - // methods that access the tree sizes - - /// nb of neighbors for this level - int nb_neighbors(int layer_no) const; - - /// cumumlative nb up to (and excluding) this level - int cum_nb_neighbors(int layer_no) const; - - /// range of entries in the neighbors table of vertex no at layer_no - void neighbor_range(idx_t no, int layer_no, - size_t * begin, size_t * end) const; - - /// only mandatory parameter: nb of neighbors - explicit HNSW(int M = 32); - - /// pick a random level for a new point - int random_level(); - - /// add n random levels to table (for debugging...) - void fill_with_random_links(size_t n); - - void add_links_starting_from(DistanceComputer& ptdis, - storage_idx_t pt_id, - storage_idx_t nearest, - float d_nearest, - int level, - omp_lock_t *locks, - VisitedTable &vt); - - - /** add point pt_id on all levels <= pt_level and build the link - * structure for them. */ - void add_with_locks(DistanceComputer& ptdis, int pt_level, int pt_id, - std::vector& locks, - VisitedTable& vt); - - int search_from_candidates(DistanceComputer& qdis, int k, - idx_t *I, float *D, - MinimaxHeap& candidates, - VisitedTable &vt, - int level, int nres_in = 0) const; - - std::priority_queue search_from_candidate_unbounded( - const Node& node, - DistanceComputer& qdis, - int ef, - VisitedTable *vt - ) const; - - /// search interface - void search(DistanceComputer& qdis, int k, - idx_t *I, float *D, - VisitedTable& vt) const; - - void reset(); - - void clear_neighbor_tables(int level); - void print_neighbor_stats(int level) const; - - int prepare_level_tab(size_t n, bool preset_levels = false); - - static void shrink_neighbor_list( - DistanceComputer& qdis, - std::priority_queue& input, - std::vector& output, - int max_size); - -}; - - -/************************************************************** - * Auxiliary structures - **************************************************************/ - -/// set implementation optimized for fast access. -struct VisitedTable { - std::vector visited; - int visno; - - explicit VisitedTable(int size) - : visited(size), visno(1) {} - - /// set flog #no to true - void set(int no) { - visited[no] = visno; - } - - /// get flag #no - bool get(int no) const { - return visited[no] == visno; - } - - /// reset all flags to false - void advance() { - visno++; - if (visno == 250) { - // 250 rather than 255 because sometimes we use visno and visno+1 - memset(visited.data(), 0, sizeof(visited[0]) * visited.size()); - visno = 1; - } - } -}; - - -struct HNSWStats { - size_t n1, n2, n3; - size_t ndis; - size_t nreorder; - bool view; - - HNSWStats() { - reset(); - } - - void reset() { - n1 = n2 = n3 = 0; - ndis = 0; - nreorder = 0; - view = false; - } -}; - -// global var that collects them all -extern HNSWStats hnsw_stats; - - -} // namespace faiss diff --git a/Heap.cpp b/Heap.cpp deleted file mode 100644 index 0621828adf..0000000000 --- a/Heap.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -/* Function for soft heap */ - -#include "Heap.h" - - -namespace faiss { - - -template -void HeapArray::heapify () -{ -#pragma omp parallel for - for (size_t j = 0; j < nh; j++) - heap_heapify (k, val + j * k, ids + j * k); -} - -template -void HeapArray::reorder () -{ -#pragma omp parallel for - for (size_t j = 0; j < nh; j++) - heap_reorder (k, val + j * k, ids + j * k); -} - -template -void HeapArray::addn (size_t nj, const T *vin, TI j0, - size_t i0, int64_t ni) -{ - if (ni == -1) ni = nh; - assert (i0 >= 0 && i0 + ni <= nh); -#pragma omp parallel for - for (size_t i = i0; i < i0 + ni; i++) { - T * __restrict simi = get_val(i); - TI * __restrict idxi = get_ids (i); - const T *ip_line = vin + (i - i0) * nj; - - for (size_t j = 0; j < nj; j++) { - T ip = ip_line [j]; - if (C::cmp(simi[0], ip)) { - heap_pop (k, simi, idxi); - heap_push (k, simi, idxi, ip, j + j0); - } - } - } -} - -template -void HeapArray::addn_with_ids ( - size_t nj, const T *vin, const TI *id_in, - int64_t id_stride, size_t i0, int64_t ni) -{ - if (id_in == nullptr) { - addn (nj, vin, 0, i0, ni); - return; - } - if (ni == -1) ni = nh; - assert (i0 >= 0 && i0 + ni <= nh); -#pragma omp parallel for - for (size_t i = i0; i < i0 + ni; i++) { - T * __restrict simi = get_val(i); - TI * __restrict idxi = get_ids (i); - const T *ip_line = vin + (i - i0) * nj; - const TI *id_line = id_in + (i - i0) * id_stride; - - for (size_t j = 0; j < nj; j++) { - T ip = ip_line [j]; - if (C::cmp(simi[0], ip)) { - heap_pop (k, simi, idxi); - heap_push (k, simi, idxi, ip, id_line [j]); - } - } - } -} - -template -void HeapArray::per_line_extrema ( - T * out_val, - TI * out_ids) const -{ -#pragma omp parallel for - for (size_t j = 0; j < nh; j++) { - int64_t imin = -1; - typename C::T xval = C::Crev::neutral (); - const typename C::T * x_ = val + j * k; - for (size_t i = 0; i < k; i++) - if (C::cmp (x_[i], xval)) { - xval = x_[i]; - imin = i; - } - if (out_val) - out_val[j] = xval; - - if (out_ids) { - if (ids && imin != -1) - out_ids[j] = ids [j * k + imin]; - else - out_ids[j] = imin; - } - } -} - - - - -// explicit instanciations - -template struct HeapArray >; -template struct HeapArray >; -template struct HeapArray >; -template struct HeapArray >; - - -} // END namespace fasis diff --git a/Heap.h b/Heap.h deleted file mode 100644 index e691c36c7f..0000000000 --- a/Heap.h +++ /dev/null @@ -1,495 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -/* - * C++ support for heaps. The set of functions is tailored for - * efficient similarity search. - * - * There is no specific object for a heap, and the functions that - * operate on a signle heap are inlined, because heaps are often - * small. More complex functions are implemented in Heaps.cpp - * - */ - - -#ifndef FAISS_Heap_h -#define FAISS_Heap_h - -#include -#include -#include - -#include -#include -#include - -#include - - -namespace faiss { - -/******************************************************************* - * C object: uniform handling of min and max heap - *******************************************************************/ - -/** The C object gives the type T of the values in the heap, the type - * of the keys, TI and the comparison that is done: > for the minheap - * and < for the maxheap. The neutral value will always be dropped in - * favor of any other value in the heap. - */ - -template -struct CMax; - -// traits of minheaps = heaps where the minimum value is stored on top -// useful to find the *max* values of an array -template -struct CMin { - typedef T_ T; - typedef TI_ TI; - typedef CMax Crev; - inline static bool cmp (T a, T b) { - return a < b; - } - // value that will be popped first -> must be smaller than all others - // for int types this is not strictly the smallest val (-max - 1) - inline static T neutral () { - return -std::numeric_limits::max(); - } -}; - - -template -struct CMax { - typedef T_ T; - typedef TI_ TI; - typedef CMin Crev; - inline static bool cmp (T a, T b) { - return a > b; - } - inline static T neutral () { - return std::numeric_limits::max(); - } -}; - - -/******************************************************************* - * Basic heap ops: push and pop - *******************************************************************/ - -/** Pops the top element from the heap defined by bh_val[0..k-1] and - * bh_ids[0..k-1]. on output the element at k-1 is undefined. - */ -template inline -void heap_pop (size_t k, typename C::T * bh_val, typename C::TI * bh_ids) -{ - bh_val--; /* Use 1-based indexing for easier node->child translation */ - bh_ids--; - typename C::T val = bh_val[k]; - size_t i = 1, i1, i2; - while (1) { - i1 = i << 1; - i2 = i1 + 1; - if (i1 > k) - break; - if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) { - if (C::cmp(val, bh_val[i1])) - break; - bh_val[i] = bh_val[i1]; - bh_ids[i] = bh_ids[i1]; - i = i1; - } - else { - if (C::cmp(val, bh_val[i2])) - break; - bh_val[i] = bh_val[i2]; - bh_ids[i] = bh_ids[i2]; - i = i2; - } - } - bh_val[i] = bh_val[k]; - bh_ids[i] = bh_ids[k]; -} - - - -/** Pushes the element (val, ids) into the heap bh_val[0..k-2] and - * bh_ids[0..k-2]. on output the element at k-1 is defined. - */ -template inline -void heap_push (size_t k, - typename C::T * bh_val, typename C::TI * bh_ids, - typename C::T val, typename C::TI ids) -{ - bh_val--; /* Use 1-based indexing for easier node->child translation */ - bh_ids--; - size_t i = k, i_father; - while (i > 1) { - i_father = i >> 1; - if (!C::cmp (val, bh_val[i_father])) /* the heap structure is ok */ - break; - bh_val[i] = bh_val[i_father]; - bh_ids[i] = bh_ids[i_father]; - i = i_father; - } - bh_val[i] = val; - bh_ids[i] = ids; -} - - - -/* Partial instanciation for heaps with TI = int64_t */ - -template inline -void minheap_pop (size_t k, T * bh_val, int64_t * bh_ids) -{ - heap_pop > (k, bh_val, bh_ids); -} - - -template inline -void minheap_push (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids) -{ - heap_push > (k, bh_val, bh_ids, val, ids); -} - - -template inline -void maxheap_pop (size_t k, T * bh_val, int64_t * bh_ids) -{ - heap_pop > (k, bh_val, bh_ids); -} - - -template inline -void maxheap_push (size_t k, T * bh_val, int64_t * bh_ids, T val, int64_t ids) -{ - heap_push > (k, bh_val, bh_ids, val, ids); -} - - - -/******************************************************************* - * Heap initialization - *******************************************************************/ - -/* Initialization phase for the heap (with unconditionnal pushes). - * Store k0 elements in a heap containing up to k values. Note that - * (bh_val, bh_ids) can be the same as (x, ids) */ -template inline -void heap_heapify ( - size_t k, - typename C::T * bh_val, - typename C::TI * bh_ids, - const typename C::T * x = nullptr, - const typename C::TI * ids = nullptr, - size_t k0 = 0) -{ - if (k0 > 0) assert (x); - - if (ids) { - for (size_t i = 0; i < k0; i++) - heap_push (i+1, bh_val, bh_ids, x[i], ids[i]); - } else { - for (size_t i = 0; i < k0; i++) - heap_push (i+1, bh_val, bh_ids, x[i], i); - } - - for (size_t i = k0; i < k; i++) { - bh_val[i] = C::neutral(); - bh_ids[i] = -1; - } - -} - -template inline -void minheap_heapify ( - size_t k, T * bh_val, - int64_t * bh_ids, - const T * x = nullptr, - const int64_t * ids = nullptr, - size_t k0 = 0) -{ - heap_heapify< CMin > (k, bh_val, bh_ids, x, ids, k0); -} - - -template inline -void maxheap_heapify ( - size_t k, - T * bh_val, - int64_t * bh_ids, - const T * x = nullptr, - const int64_t * ids = nullptr, - size_t k0 = 0) -{ - heap_heapify< CMax > (k, bh_val, bh_ids, x, ids, k0); -} - - - -/******************************************************************* - * Add n elements to the heap - *******************************************************************/ - - -/* Add some elements to the heap */ -template inline -void heap_addn (size_t k, - typename C::T * bh_val, typename C::TI * bh_ids, - const typename C::T * x, - const typename C::TI * ids, - size_t n) -{ - size_t i; - if (ids) - for (i = 0; i < n; i++) { - if (C::cmp (bh_val[0], x[i])) { - heap_pop (k, bh_val, bh_ids); - heap_push (k, bh_val, bh_ids, x[i], ids[i]); - } - } - else - for (i = 0; i < n; i++) { - if (C::cmp (bh_val[0], x[i])) { - heap_pop (k, bh_val, bh_ids); - heap_push (k, bh_val, bh_ids, x[i], i); - } - } -} - - -/* Partial instanciation for heaps with TI = int64_t */ - -template inline -void minheap_addn (size_t k, T * bh_val, int64_t * bh_ids, - const T * x, const int64_t * ids, size_t n) -{ - heap_addn > (k, bh_val, bh_ids, x, ids, n); -} - -template inline -void maxheap_addn (size_t k, T * bh_val, int64_t * bh_ids, - const T * x, const int64_t * ids, size_t n) -{ - heap_addn > (k, bh_val, bh_ids, x, ids, n); -} - - - - - - -/******************************************************************* - * Heap finalization (reorder elements) - *******************************************************************/ - - -/* This function maps a binary heap into an sorted structure. - It returns the number */ -template inline -size_t heap_reorder (size_t k, typename C::T * bh_val, typename C::TI * bh_ids) -{ - size_t i, ii; - - for (i = 0, ii = 0; i < k; i++) { - /* top element should be put at the end of the list */ - typename C::T val = bh_val[0]; - typename C::TI id = bh_ids[0]; - - /* boundary case: we will over-ride this value if not a true element */ - heap_pop (k-i, bh_val, bh_ids); - bh_val[k-ii-1] = val; - bh_ids[k-ii-1] = id; - if (id != -1) ii++; - } - /* Count the number of elements which are effectively returned */ - size_t nel = ii; - - memmove (bh_val, bh_val+k-ii, ii * sizeof(*bh_val)); - memmove (bh_ids, bh_ids+k-ii, ii * sizeof(*bh_ids)); - - for (; ii < k; ii++) { - bh_val[ii] = C::neutral(); - bh_ids[ii] = -1; - } - return nel; -} - -template inline -size_t minheap_reorder (size_t k, T * bh_val, int64_t * bh_ids) -{ - return heap_reorder< CMin > (k, bh_val, bh_ids); -} - -template inline -size_t maxheap_reorder (size_t k, T * bh_val, int64_t * bh_ids) -{ - return heap_reorder< CMax > (k, bh_val, bh_ids); -} - - - - - -/******************************************************************* - * Operations on heap arrays - *******************************************************************/ - -/** a template structure for a set of [min|max]-heaps it is tailored - * so that the actual data of the heaps can just live in compact - * arrays. - */ -template -struct HeapArray { - typedef typename C::TI TI; - typedef typename C::T T; - - size_t nh; ///< number of heaps - size_t k; ///< allocated size per heap - TI * ids; ///< identifiers (size nh * k) - T * val; ///< values (distances or similarities), size nh * k - - /// Return the list of values for a heap - T * get_val (size_t key) { return val + key * k; } - - /// Correspponding identifiers - TI * get_ids (size_t key) { return ids + key * k; } - - /// prepare all the heaps before adding - void heapify (); - - /** add nj elements to heaps i0:i0+ni, with sequential ids - * - * @param nj nb of elements to add to each heap - * @param vin elements to add, size ni * nj - * @param j0 add this to the ids that are added - * @param i0 first heap to update - * @param ni nb of elements to update (-1 = use nh) - */ - void addn (size_t nj, const T *vin, TI j0 = 0, - size_t i0 = 0, int64_t ni = -1); - - /** same as addn - * - * @param id_in ids of the elements to add, size ni * nj - * @param id_stride stride for id_in - */ - void addn_with_ids ( - size_t nj, const T *vin, const TI *id_in = nullptr, - int64_t id_stride = 0, size_t i0 = 0, int64_t ni = -1); - - /// reorder all the heaps - void reorder (); - - /** this is not really a heap function. It just finds the per-line - * extrema of each line of array D - * @param vals_out extreme value of each line (size nh, or NULL) - * @param idx_out index of extreme value (size nh or NULL) - */ - void per_line_extrema (T *vals_out, TI *idx_out) const; - -}; - - -/* Define useful heaps */ -typedef HeapArray > float_minheap_array_t; -typedef HeapArray > int_minheap_array_t; - -typedef HeapArray > float_maxheap_array_t; -typedef HeapArray > int_maxheap_array_t; - -// The heap templates are instanciated explicitly in Heap.cpp - - - - - - - - - - - - - - - - - - - -/********************************************************************* - * Indirect heaps: instead of having - * - * node i = (bh_ids[i], bh_val[i]), - * - * in indirect heaps, - * - * node i = (bh_ids[i], bh_val[bh_ids[i]]), - * - *********************************************************************/ - - -template -inline -void indirect_heap_pop ( - size_t k, - const typename C::T * bh_val, - typename C::TI * bh_ids) -{ - bh_ids--; /* Use 1-based indexing for easier node->child translation */ - typename C::T val = bh_val[bh_ids[k]]; - size_t i = 1; - while (1) { - size_t i1 = i << 1; - size_t i2 = i1 + 1; - if (i1 > k) - break; - typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2]; - if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) { - if (C::cmp(val, bh_val[id1])) - break; - bh_ids[i] = id1; - i = i1; - } else { - if (C::cmp(val, bh_val[id2])) - break; - bh_ids[i] = id2; - i = i2; - } - } - bh_ids[i] = bh_ids[k]; -} - - - -template -inline -void indirect_heap_push (size_t k, - const typename C::T * bh_val, typename C::TI * bh_ids, - typename C::TI id) -{ - bh_ids--; /* Use 1-based indexing for easier node->child translation */ - typename C::T val = bh_val[id]; - size_t i = k; - while (i > 1) { - size_t i_father = i >> 1; - if (!C::cmp (val, bh_val[bh_ids[i_father]])) - break; - bh_ids[i] = bh_ids[i_father]; - i = i_father; - } - bh_ids[i] = id; -} - - -} // namespace faiss - -#endif /* FAISS_Heap_h */ diff --git a/PolysemousTraining.cpp b/PolysemousTraining.cpp deleted file mode 100644 index ebfc5c217b..0000000000 --- a/PolysemousTraining.cpp +++ /dev/null @@ -1,951 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "PolysemousTraining.h" - -#include -#include -#include -#include - -#include - -#include "utils.h" -#include "hamming.h" - -#include "FaissAssert.h" - -/***************************************** - * Mixed PQ / Hamming - ******************************************/ - -namespace faiss { - - -/**************************************************** - * Optimization code - ****************************************************/ - -SimulatedAnnealingParameters::SimulatedAnnealingParameters () -{ - // set some reasonable defaults for the optimization - init_temperature = 0.7; - temperature_decay = pow (0.9, 1/500.); - // reduce by a factor 0.9 every 500 it - n_iter = 500000; - n_redo = 2; - seed = 123; - verbose = 0; - only_bit_flips = false; - init_random = false; -} - -// what would the cost update be if iw and jw were swapped? -// default implementation just computes both and computes the difference -double PermutationObjective::cost_update ( - const int *perm, int iw, int jw) const -{ - double orig_cost = compute_cost (perm); - - std::vector perm2 (n); - for (int i = 0; i < n; i++) - perm2[i] = perm[i]; - perm2[iw] = perm[jw]; - perm2[jw] = perm[iw]; - - double new_cost = compute_cost (perm2.data()); - return new_cost - orig_cost; -} - - - - -SimulatedAnnealingOptimizer::SimulatedAnnealingOptimizer ( - PermutationObjective *obj, - const SimulatedAnnealingParameters &p): - SimulatedAnnealingParameters (p), - obj (obj), - n(obj->n), - logfile (nullptr) -{ - rnd = new RandomGenerator (p.seed); - FAISS_THROW_IF_NOT (n < 100000 && n >=0 ); -} - -SimulatedAnnealingOptimizer::~SimulatedAnnealingOptimizer () -{ - delete rnd; -} - -// run the optimization and return the best result in best_perm -double SimulatedAnnealingOptimizer::run_optimization (int * best_perm) -{ - double min_cost = 1e30; - - // just do a few runs of the annealing and keep the lowest output cost - for (int it = 0; it < n_redo; it++) { - std::vector perm(n); - for (int i = 0; i < n; i++) - perm[i] = i; - if (init_random) { - for (int i = 0; i < n; i++) { - int j = i + rnd->rand_int (n - i); - std::swap (perm[i], perm[j]); - } - } - float cost = optimize (perm.data()); - if (logfile) fprintf (logfile, "\n"); - if(verbose > 1) { - printf (" optimization run %d: cost=%g %s\n", - it, cost, cost < min_cost ? "keep" : ""); - } - if (cost < min_cost) { - memcpy (best_perm, perm.data(), sizeof(perm[0]) * n); - min_cost = cost; - } - } - return min_cost; -} - -// perform the optimization loop, starting from and modifying -// permutation in-place -double SimulatedAnnealingOptimizer::optimize (int *perm) -{ - double cost = init_cost = obj->compute_cost (perm); - int log2n = 0; - while (!(n <= (1 << log2n))) log2n++; - double temperature = init_temperature; - int n_swap = 0, n_hot = 0; - for (int it = 0; it < n_iter; it++) { - temperature = temperature * temperature_decay; - int iw, jw; - if (only_bit_flips) { - iw = rnd->rand_int (n); - jw = iw ^ (1 << rnd->rand_int (log2n)); - } else { - iw = rnd->rand_int (n); - jw = rnd->rand_int (n - 1); - if (jw == iw) jw++; - } - double delta_cost = obj->cost_update (perm, iw, jw); - if (delta_cost < 0 || rnd->rand_float () < temperature) { - std::swap (perm[iw], perm[jw]); - cost += delta_cost; - n_swap++; - if (delta_cost >= 0) n_hot++; - } - if (verbose > 2 || (verbose > 1 && it % 10000 == 0)) { - printf (" iteration %d cost %g temp %g n_swap %d " - "(%d hot) \r", - it, cost, temperature, n_swap, n_hot); - fflush(stdout); - } - if (logfile) { - fprintf (logfile, "%d %g %g %d %d\n", - it, cost, temperature, n_swap, n_hot); - } - } - if (verbose > 1) printf("\n"); - return cost; -} - - - - - -/**************************************************** - * Cost functions: ReproduceDistanceTable - ****************************************************/ - - - - - - -static inline int hamming_dis (uint64_t a, uint64_t b) -{ - return __builtin_popcountl (a ^ b); -} - -namespace { - -/// optimize permutation to reproduce a distance table with Hamming distances -struct ReproduceWithHammingObjective : PermutationObjective { - int nbits; - double dis_weight_factor; - - static double sqr (double x) { return x * x; } - - - // weihgting of distances: it is more important to reproduce small - // distances well - double dis_weight (double x) const - { - return exp (-dis_weight_factor * x); - } - - std::vector target_dis; // wanted distances (size n^2) - std::vector weights; // weights for each distance (size n^2) - - // cost = quadratic difference between actual distance and Hamming distance - double compute_cost(const int* perm) const override { - double cost = 0; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - double wanted = target_dis[i * n + j]; - double w = weights[i * n + j]; - double actual = hamming_dis(perm[i], perm[j]); - cost += w * sqr(wanted - actual); - } - } - return cost; - } - - - // what would the cost update be if iw and jw were swapped? - // computed in O(n) instead of O(n^2) for the full re-computation - double cost_update(const int* perm, int iw, int jw) const override { - double delta_cost = 0; - - for (int i = 0; i < n; i++) { - if (i == iw) { - for (int j = 0; j < n; j++) { - double wanted = target_dis[i * n + j], w = weights[i * n + j]; - double actual = hamming_dis(perm[i], perm[j]); - delta_cost -= w * sqr(wanted - actual); - double new_actual = - hamming_dis(perm[jw], perm[j == iw ? jw : j == jw ? iw : j]); - delta_cost += w * sqr(wanted - new_actual); - } - } else if (i == jw) { - for (int j = 0; j < n; j++) { - double wanted = target_dis[i * n + j], w = weights[i * n + j]; - double actual = hamming_dis(perm[i], perm[j]); - delta_cost -= w * sqr(wanted - actual); - double new_actual = - hamming_dis(perm[iw], perm[j == iw ? jw : j == jw ? iw : j]); - delta_cost += w * sqr(wanted - new_actual); - } - } else { - int j = iw; - { - double wanted = target_dis[i * n + j], w = weights[i * n + j]; - double actual = hamming_dis(perm[i], perm[j]); - delta_cost -= w * sqr(wanted - actual); - double new_actual = hamming_dis(perm[i], perm[jw]); - delta_cost += w * sqr(wanted - new_actual); - } - j = jw; - { - double wanted = target_dis[i * n + j], w = weights[i * n + j]; - double actual = hamming_dis(perm[i], perm[j]); - delta_cost -= w * sqr(wanted - actual); - double new_actual = hamming_dis(perm[i], perm[iw]); - delta_cost += w * sqr(wanted - new_actual); - } - } - } - - return delta_cost; - } - - - - ReproduceWithHammingObjective ( - int nbits, - const std::vector & dis_table, - double dis_weight_factor): - nbits (nbits), dis_weight_factor (dis_weight_factor) - { - n = 1 << nbits; - FAISS_THROW_IF_NOT (dis_table.size() == n * n); - set_affine_target_dis (dis_table); - } - - void set_affine_target_dis (const std::vector & dis_table) - { - double sum = 0, sum2 = 0; - int n2 = n * n; - for (int i = 0; i < n2; i++) { - sum += dis_table [i]; - sum2 += dis_table [i] * dis_table [i]; - } - double mean = sum / n2; - double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2)); - - target_dis.resize (n2); - - for (int i = 0; i < n2; i++) { - // the mapping function - double td = (dis_table [i] - mean) / stddev * sqrt(nbits / 4) + - nbits / 2; - target_dis[i] = td; - // compute a weight - weights.push_back (dis_weight (td)); - } - - } - - ~ReproduceWithHammingObjective() override {} -}; - -} // anonymous namespace - -// weihgting of distances: it is more important to reproduce small -// distances well -double ReproduceDistancesObjective::dis_weight (double x) const -{ - return exp (-dis_weight_factor * x); -} - - -double ReproduceDistancesObjective::get_source_dis (int i, int j) const -{ - return source_dis [i * n + j]; -} - -// cost = quadratic difference between actual distance and Hamming distance -double ReproduceDistancesObjective::compute_cost (const int *perm) const -{ - double cost = 0; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - double wanted = target_dis [i * n + j]; - double w = weights [i * n + j]; - double actual = get_source_dis (perm[i], perm[j]); - cost += w * sqr (wanted - actual); - } - } - return cost; -} - -// what would the cost update be if iw and jw were swapped? -// computed in O(n) instead of O(n^2) for the full re-computation -double ReproduceDistancesObjective::cost_update( - const int *perm, int iw, int jw) const -{ - double delta_cost = 0; - for (int i = 0; i < n; i++) { - if (i == iw) { - for (int j = 0; j < n; j++) { - double wanted = target_dis [i * n + j], - w = weights [i * n + j]; - double actual = get_source_dis (perm[i], perm[j]); - delta_cost -= w * sqr (wanted - actual); - double new_actual = get_source_dis ( - perm[jw], - perm[j == iw ? jw : j == jw ? iw : j]); - delta_cost += w * sqr (wanted - new_actual); - } - } else if (i == jw) { - for (int j = 0; j < n; j++) { - double wanted = target_dis [i * n + j], - w = weights [i * n + j]; - double actual = get_source_dis (perm[i], perm[j]); - delta_cost -= w * sqr (wanted - actual); - double new_actual = get_source_dis ( - perm[iw], - perm[j == iw ? jw : j == jw ? iw : j]); - delta_cost += w * sqr (wanted - new_actual); - } - } else { - int j = iw; - { - double wanted = target_dis [i * n + j], - w = weights [i * n + j]; - double actual = get_source_dis (perm[i], perm[j]); - delta_cost -= w * sqr (wanted - actual); - double new_actual = get_source_dis (perm[i], perm[jw]); - delta_cost += w * sqr (wanted - new_actual); - } - j = jw; - { - double wanted = target_dis [i * n + j], - w = weights [i * n + j]; - double actual = get_source_dis (perm[i], perm[j]); - delta_cost -= w * sqr (wanted - actual); - double new_actual = get_source_dis (perm[i], perm[iw]); - delta_cost += w * sqr (wanted - new_actual); - } - } - } - return delta_cost; -} - - - -ReproduceDistancesObjective::ReproduceDistancesObjective ( - int n, - const double *source_dis_in, - const double *target_dis_in, - double dis_weight_factor): - dis_weight_factor (dis_weight_factor), - target_dis (target_dis_in) -{ - this->n = n; - set_affine_target_dis (source_dis_in); -} - -void ReproduceDistancesObjective::compute_mean_stdev ( - const double *tab, size_t n2, - double *mean_out, double *stddev_out) -{ - double sum = 0, sum2 = 0; - for (int i = 0; i < n2; i++) { - sum += tab [i]; - sum2 += tab [i] * tab [i]; - } - double mean = sum / n2; - double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2)); - *mean_out = mean; - *stddev_out = stddev; -} - -void ReproduceDistancesObjective::set_affine_target_dis ( - const double *source_dis_in) -{ - int n2 = n * n; - - double mean_src, stddev_src; - compute_mean_stdev (source_dis_in, n2, &mean_src, &stddev_src); - - double mean_target, stddev_target; - compute_mean_stdev (target_dis, n2, &mean_target, &stddev_target); - - printf ("map mean %g std %g -> mean %g std %g\n", - mean_src, stddev_src, mean_target, stddev_target); - - source_dis.resize (n2); - weights.resize (n2); - - for (int i = 0; i < n2; i++) { - // the mapping function - source_dis[i] = (source_dis_in[i] - mean_src) / stddev_src - * stddev_target + mean_target; - - // compute a weight - weights [i] = dis_weight (target_dis[i]); - } - -} - -/**************************************************** - * Cost functions: RankingScore - ****************************************************/ - -/// Maintains a 3D table of elementary costs. -/// Accumulates elements based on Hamming distance comparisons -template -struct Score3Computer: PermutationObjective { - - int nc; - - // cost matrix of size nc * nc *nc - // n_gt (i,j,k) = count of d_gt(x, y-) < d_gt(x, y+) - // where x has PQ code i, y- PQ code j and y+ PQ code k - std::vector n_gt; - - - /// the cost is a triple loop on the nc * nc * nc matrix of entries. - /// - Taccu compute (const int * perm) const - { - Taccu accu = 0; - const Ttab *p = n_gt.data(); - for (int i = 0; i < nc; i++) { - int ip = perm [i]; - for (int j = 0; j < nc; j++) { - int jp = perm [j]; - for (int k = 0; k < nc; k++) { - int kp = perm [k]; - if (hamming_dis (ip, jp) < - hamming_dis (ip, kp)) { - accu += *p; // n_gt [ ( i * nc + j) * nc + k]; - } - p++; - } - } - } - return accu; - } - - - /** cost update if entries iw and jw of the permutation would be - * swapped. - * - * The computation is optimized by avoiding elements in the - * nc*nc*nc cube that are known not to change. For nc=256, this - * reduces the nb of cells to visit to about 6/256 th of the - * cells. Practical speedup is about 8x, and the code is quite - * complex :-/ - */ - Taccu compute_update (const int *perm, int iw, int jw) const - { - assert (iw != jw); - if (iw > jw) std::swap (iw, jw); - - Taccu accu = 0; - const Ttab * n_gt_i = n_gt.data(); - for (int i = 0; i < nc; i++) { - int ip0 = perm [i]; - int ip = perm [i == iw ? jw : i == jw ? iw : i]; - - //accu += update_i (perm, iw, jw, ip0, ip, n_gt_i); - - accu += update_i_cross (perm, iw, jw, - ip0, ip, n_gt_i); - - if (ip != ip0) - accu += update_i_plane (perm, iw, jw, - ip0, ip, n_gt_i); - - n_gt_i += nc * nc; - } - - return accu; - } - - - Taccu update_i (const int *perm, int iw, int jw, - int ip0, int ip, const Ttab * n_gt_i) const - { - Taccu accu = 0; - const Ttab *n_gt_ij = n_gt_i; - for (int j = 0; j < nc; j++) { - int jp0 = perm[j]; - int jp = perm [j == iw ? jw : j == jw ? iw : j]; - for (int k = 0; k < nc; k++) { - int kp0 = perm [k]; - int kp = perm [k == iw ? jw : k == jw ? iw : k]; - int ng = n_gt_ij [k]; - if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) { - accu += ng; - } - if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) { - accu -= ng; - } - } - n_gt_ij += nc; - } - return accu; - } - - // 2 inner loops for the case ip0 != ip - Taccu update_i_plane (const int *perm, int iw, int jw, - int ip0, int ip, const Ttab * n_gt_i) const - { - Taccu accu = 0; - const Ttab *n_gt_ij = n_gt_i; - - for (int j = 0; j < nc; j++) { - if (j != iw && j != jw) { - int jp = perm[j]; - for (int k = 0; k < nc; k++) { - if (k != iw && k != jw) { - int kp = perm [k]; - Ttab ng = n_gt_ij [k]; - if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) { - accu += ng; - } - if (hamming_dis (ip0, jp) < hamming_dis (ip0, kp)) { - accu -= ng; - } - } - } - } - n_gt_ij += nc; - } - return accu; - } - - /// used for the 8 cells were the 3 indices are swapped - inline Taccu update_k (const int *perm, int iw, int jw, - int ip0, int ip, int jp0, int jp, - int k, - const Ttab * n_gt_ij) const - { - Taccu accu = 0; - int kp0 = perm [k]; - int kp = perm [k == iw ? jw : k == jw ? iw : k]; - Ttab ng = n_gt_ij [k]; - if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) { - accu += ng; - } - if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) { - accu -= ng; - } - return accu; - } - - /// compute update on a line of k's, where i and j are swapped - Taccu update_j_line (const int *perm, int iw, int jw, - int ip0, int ip, int jp0, int jp, - const Ttab * n_gt_ij) const - { - Taccu accu = 0; - for (int k = 0; k < nc; k++) { - if (k == iw || k == jw) continue; - int kp = perm [k]; - Ttab ng = n_gt_ij [k]; - if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) { - accu += ng; - } - if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp)) { - accu -= ng; - } - } - return accu; - } - - - /// considers the 2 pairs of crossing lines j=iw or jw and k = iw or kw - Taccu update_i_cross (const int *perm, int iw, int jw, - int ip0, int ip, const Ttab * n_gt_i) const - { - Taccu accu = 0; - const Ttab *n_gt_ij = n_gt_i; - - for (int j = 0; j < nc; j++) { - int jp0 = perm[j]; - int jp = perm [j == iw ? jw : j == jw ? iw : j]; - - accu += update_k (perm, iw, jw, ip0, ip, jp0, jp, iw, n_gt_ij); - accu += update_k (perm, iw, jw, ip0, ip, jp0, jp, jw, n_gt_ij); - - if (jp != jp0) - accu += update_j_line (perm, iw, jw, ip0, ip, jp0, jp, n_gt_ij); - - n_gt_ij += nc; - } - return accu; - } - - - /// PermutationObjective implementeation (just negates the scores - /// for minimization) - - double compute_cost(const int* perm) const override { - return -compute(perm); - } - - double cost_update(const int* perm, int iw, int jw) const override { - double ret = -compute_update(perm, iw, jw); - return ret; - } - - ~Score3Computer() override {} -}; - - - - - -struct IndirectSort { - const float *tab; - bool operator () (int a, int b) {return tab[a] < tab[b]; } -}; - - - -struct RankingScore2: Score3Computer { - int nbits; - int nq, nb; - const uint32_t *qcodes, *bcodes; - const float *gt_distances; - - RankingScore2 (int nbits, int nq, int nb, - const uint32_t *qcodes, const uint32_t *bcodes, - const float *gt_distances): - nbits(nbits), nq(nq), nb(nb), qcodes(qcodes), - bcodes(bcodes), gt_distances(gt_distances) - { - n = nc = 1 << nbits; - n_gt.resize (nc * nc * nc); - init_n_gt (); - } - - - double rank_weight (int r) - { - return 1.0 / (r + 1); - } - - /// count nb of i, j in a x b st. i < j - /// a and b should be sorted on input - /// they are the ranks of j and k respectively. - /// specific version for diff-of-rank weighting, cannot optimized - /// with a cumulative table - double accum_gt_weight_diff (const std::vector & a, - const std::vector & b) - { - int nb = b.size(), na = a.size(); - - double accu = 0; - int j = 0; - for (int i = 0; i < na; i++) { - int ai = a[i]; - while (j < nb && ai >= b[j]) j++; - - double accu_i = 0; - for (int k = j; k < b.size(); k++) - accu_i += rank_weight (b[k] - ai); - - accu += rank_weight (ai) * accu_i; - - } - return accu; - } - - void init_n_gt () - { - for (int q = 0; q < nq; q++) { - const float *gtd = gt_distances + q * nb; - const uint32_t *cb = bcodes;// all same codes - float * n_gt_q = & n_gt [qcodes[q] * nc * nc]; - - printf("init gt for q=%d/%d \r", q, nq); fflush(stdout); - - std::vector rankv (nb); - int * ranks = rankv.data(); - - // elements in each code bin, ordered by rank within each bin - std::vector > tab (nc); - - { // build rank table - IndirectSort s = {gtd}; - for (int j = 0; j < nb; j++) ranks[j] = j; - std::sort (ranks, ranks + nb, s); - } - - for (int rank = 0; rank < nb; rank++) { - int i = ranks [rank]; - tab [cb[i]].push_back (rank); - } - - - // this is very expensive. Any suggestion for improvement - // welcome. - for (int i = 0; i < nc; i++) { - std::vector & di = tab[i]; - for (int j = 0; j < nc; j++) { - std::vector & dj = tab[j]; - n_gt_q [i * nc + j] += accum_gt_weight_diff (di, dj); - - } - } - - } - - } - -}; - - -/***************************************** - * PolysemousTraining - ******************************************/ - - - -PolysemousTraining::PolysemousTraining () -{ - optimization_type = OT_ReproduceDistances_affine; - ntrain_permutation = 0; - dis_weight_factor = log(2); -} - - - -void PolysemousTraining::optimize_reproduce_distances ( - ProductQuantizer &pq) const -{ - - int dsub = pq.dsub; - - int n = pq.ksub; - int nbits = pq.nbits; - -#pragma omp parallel for - for (int m = 0; m < pq.M; m++) { - std::vector dis_table; - - // printf ("Optimizing quantizer %d\n", m); - - float * centroids = pq.get_centroids (m, 0); - - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - dis_table.push_back (fvec_L2sqr (centroids + i * dsub, - centroids + j * dsub, - dsub)); - } - } - - std::vector perm (n); - ReproduceWithHammingObjective obj ( - nbits, dis_table, - dis_weight_factor); - - - SimulatedAnnealingOptimizer optim (&obj, *this); - - if (log_pattern.size()) { - char fname[256]; - snprintf (fname, 256, log_pattern.c_str(), m); - printf ("opening log file %s\n", fname); - optim.logfile = fopen (fname, "w"); - FAISS_THROW_IF_NOT_MSG (optim.logfile, "could not open logfile"); - } - double final_cost = optim.run_optimization (perm.data()); - - if (verbose > 0) { - printf ("SimulatedAnnealingOptimizer for m=%d: %g -> %g\n", - m, optim.init_cost, final_cost); - } - - if (log_pattern.size()) fclose (optim.logfile); - - std::vector centroids_copy; - for (int i = 0; i < dsub * n; i++) - centroids_copy.push_back (centroids[i]); - - for (int i = 0; i < n; i++) - memcpy (centroids + perm[i] * dsub, - centroids_copy.data() + i * dsub, - dsub * sizeof(centroids[0])); - - } - -} - - -void PolysemousTraining::optimize_ranking ( - ProductQuantizer &pq, size_t n, const float *x) const -{ - - int dsub = pq.dsub; - - int nbits = pq.nbits; - - std::vector all_codes (pq.code_size * n); - - pq.compute_codes (x, all_codes.data(), n); - - FAISS_THROW_IF_NOT (pq.nbits == 8); - - if (n == 0) - pq.compute_sdc_table (); - -#pragma omp parallel for - for (int m = 0; m < pq.M; m++) { - size_t nq, nb; - std::vector codes; // query codes, then db codes - std::vector gt_distances; // nq * nb matrix of distances - - if (n > 0) { - std::vector xtrain (n * dsub); - for (int i = 0; i < n; i++) - memcpy (xtrain.data() + i * dsub, - x + i * pq.d + m * dsub, - sizeof(float) * dsub); - - codes.resize (n); - for (int i = 0; i < n; i++) - codes [i] = all_codes [i * pq.code_size + m]; - - nq = n / 4; nb = n - nq; - const float *xq = xtrain.data(); - const float *xb = xq + nq * dsub; - - gt_distances.resize (nq * nb); - - pairwise_L2sqr (dsub, - nq, xq, - nb, xb, - gt_distances.data()); - } else { - nq = nb = pq.ksub; - codes.resize (2 * nq); - for (int i = 0; i < nq; i++) - codes[i] = codes [i + nq] = i; - - gt_distances.resize (nq * nb); - - memcpy (gt_distances.data (), - pq.sdc_table.data () + m * nq * nb, - sizeof (float) * nq * nb); - } - - double t0 = getmillisecs (); - - PermutationObjective *obj = new RankingScore2 ( - nbits, nq, nb, - codes.data(), codes.data() + nq, - gt_distances.data ()); - ScopeDeleter1 del (obj); - - if (verbose > 0) { - printf(" m=%d, nq=%ld, nb=%ld, intialize RankingScore " - "in %.3f ms\n", - m, nq, nb, getmillisecs () - t0); - } - - SimulatedAnnealingOptimizer optim (obj, *this); - - if (log_pattern.size()) { - char fname[256]; - snprintf (fname, 256, log_pattern.c_str(), m); - printf ("opening log file %s\n", fname); - optim.logfile = fopen (fname, "w"); - FAISS_THROW_IF_NOT_FMT (optim.logfile, - "could not open logfile %s", fname); - } - - std::vector perm (pq.ksub); - - double final_cost = optim.run_optimization (perm.data()); - printf ("SimulatedAnnealingOptimizer for m=%d: %g -> %g\n", - m, optim.init_cost, final_cost); - - if (log_pattern.size()) fclose (optim.logfile); - - float * centroids = pq.get_centroids (m, 0); - - std::vector centroids_copy; - for (int i = 0; i < dsub * pq.ksub; i++) - centroids_copy.push_back (centroids[i]); - - for (int i = 0; i < pq.ksub; i++) - memcpy (centroids + perm[i] * dsub, - centroids_copy.data() + i * dsub, - dsub * sizeof(centroids[0])); - - } - -} - - - -void PolysemousTraining::optimize_pq_for_hamming (ProductQuantizer &pq, - size_t n, const float *x) const -{ - if (optimization_type == OT_None) { - - } else if (optimization_type == OT_ReproduceDistances_affine) { - optimize_reproduce_distances (pq); - } else { - optimize_ranking (pq, n, x); - } - - pq.compute_sdc_table (); - -} - - -} // namespace faiss diff --git a/PolysemousTraining.h b/PolysemousTraining.h deleted file mode 100644 index ada8512941..0000000000 --- a/PolysemousTraining.h +++ /dev/null @@ -1,158 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED -#define FAISS_POLYSEMOUS_TRAINING_INCLUDED - - -#include "ProductQuantizer.h" - - -namespace faiss { - - -/// parameters used for the simulated annealing method -struct SimulatedAnnealingParameters { - - // optimization parameters - double init_temperature; // init probaility of accepting a bad swap - double temperature_decay; // at each iteration the temp is multiplied by this - int n_iter; // nb of iterations - int n_redo; // nb of runs of the simulation - int seed; // random seed - int verbose; - bool only_bit_flips; // restrict permutation changes to bit flips - bool init_random; // intialize with a random permutation (not identity) - - // set reasonable defaults - SimulatedAnnealingParameters (); - -}; - - -/// abstract class for the loss function -struct PermutationObjective { - - int n; - - virtual double compute_cost (const int *perm) const = 0; - - // what would the cost update be if iw and jw were swapped? - // default implementation just computes both and computes the difference - virtual double cost_update (const int *perm, int iw, int jw) const; - - virtual ~PermutationObjective () {} -}; - - -struct ReproduceDistancesObjective : PermutationObjective { - - double dis_weight_factor; - - static double sqr (double x) { return x * x; } - - // weihgting of distances: it is more important to reproduce small - // distances well - double dis_weight (double x) const; - - std::vector source_dis; ///< "real" corrected distances (size n^2) - const double * target_dis; ///< wanted distances (size n^2) - std::vector weights; ///< weights for each distance (size n^2) - - double get_source_dis (int i, int j) const; - - // cost = quadratic difference between actual distance and Hamming distance - double compute_cost(const int* perm) const override; - - // what would the cost update be if iw and jw were swapped? - // computed in O(n) instead of O(n^2) for the full re-computation - double cost_update(const int* perm, int iw, int jw) const override; - - ReproduceDistancesObjective ( - int n, - const double *source_dis_in, - const double *target_dis_in, - double dis_weight_factor); - - static void compute_mean_stdev (const double *tab, size_t n2, - double *mean_out, double *stddev_out); - - void set_affine_target_dis (const double *source_dis_in); - - ~ReproduceDistancesObjective() override {} -}; - -struct RandomGenerator; - -/// Simulated annealing optimization algorithm for permutations. - struct SimulatedAnnealingOptimizer: SimulatedAnnealingParameters { - - PermutationObjective *obj; - int n; ///< size of the permutation - FILE *logfile; /// logs values of the cost function - - SimulatedAnnealingOptimizer (PermutationObjective *obj, - const SimulatedAnnealingParameters &p); - RandomGenerator *rnd; - - /// remember intial cost of optimization - double init_cost; - - // main entry point. Perform the optimization loop, starting from - // and modifying permutation in-place - double optimize (int *perm); - - // run the optimization and return the best result in best_perm - double run_optimization (int * best_perm); - - virtual ~SimulatedAnnealingOptimizer (); -}; - - - - -/// optimizes the order of indices in a ProductQuantizer -struct PolysemousTraining: SimulatedAnnealingParameters { - - enum Optimization_type_t { - OT_None, - OT_ReproduceDistances_affine, ///< default - OT_Ranking_weighted_diff /// same as _2, but use rank of y+ - rank of y- - }; - Optimization_type_t optimization_type; - - // use 1/4 of the training points for the optimization, with - // max. ntrain_permutation. If ntrain_permutation == 0: train on - // centroids - int ntrain_permutation; - double dis_weight_factor; // decay of exp that weights distance loss - - // filename pattern for the logging of iterations - std::string log_pattern; - - // sets default values - PolysemousTraining (); - - /// reorder the centroids so that the Hamming distace becomes a - /// good approximation of the SDC distance (called by train) - void optimize_pq_for_hamming (ProductQuantizer & pq, - size_t n, const float *x) const; - - /// called by optimize_pq_for_hamming - void optimize_ranking (ProductQuantizer &pq, size_t n, const float *x) const; - /// called by optimize_pq_for_hamming - void optimize_reproduce_distances (ProductQuantizer &pq) const; - -}; - - -} // namespace faiss - - -#endif diff --git a/ProductQuantizer.cpp b/ProductQuantizer.cpp deleted file mode 100644 index 2b709fe3d8..0000000000 --- a/ProductQuantizer.cpp +++ /dev/null @@ -1,876 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "ProductQuantizer.h" - - -#include -#include -#include -#include - -#include - -#include "FaissAssert.h" -#include "VectorTransform.h" -#include "IndexFlat.h" -#include "utils.h" - - -extern "C" { - -/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */ - -int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER * - n, FINTEGER *k, const float *alpha, const float *a, - FINTEGER *lda, const float *b, FINTEGER * - ldb, float *beta, float *c, FINTEGER *ldc); - -} - - -namespace faiss { - - -/* compute an estimator using look-up tables for typical values of M */ -template -void pq_estimators_from_tables_Mmul4 (int M, const CT * codes, - size_t ncodes, - const float * __restrict dis_table, - size_t ksub, - size_t k, - float * heap_dis, - int64_t * heap_ids) -{ - - for (size_t j = 0; j < ncodes; j++) { - float dis = 0; - const float *dt = dis_table; - - for (size_t m = 0; m < M; m+=4) { - float dism = 0; - dism = dt[*codes++]; dt += ksub; - dism += dt[*codes++]; dt += ksub; - dism += dt[*codes++]; dt += ksub; - dism += dt[*codes++]; dt += ksub; - dis += dism; - } - - if (C::cmp (heap_dis[0], dis)) { - heap_pop (k, heap_dis, heap_ids); - heap_push (k, heap_dis, heap_ids, dis, j); - } - } -} - - -template -void pq_estimators_from_tables_M4 (const CT * codes, - size_t ncodes, - const float * __restrict dis_table, - size_t ksub, - size_t k, - float * heap_dis, - int64_t * heap_ids) -{ - - for (size_t j = 0; j < ncodes; j++) { - float dis = 0; - const float *dt = dis_table; - dis = dt[*codes++]; dt += ksub; - dis += dt[*codes++]; dt += ksub; - dis += dt[*codes++]; dt += ksub; - dis += dt[*codes++]; - - if (C::cmp (heap_dis[0], dis)) { - heap_pop (k, heap_dis, heap_ids); - heap_push (k, heap_dis, heap_ids, dis, j); - } - } -} - - -template -static inline void pq_estimators_from_tables (const ProductQuantizer& pq, - const CT * codes, - size_t ncodes, - const float * dis_table, - size_t k, - float * heap_dis, - int64_t * heap_ids) -{ - - if (pq.M == 4) { - - pq_estimators_from_tables_M4 (codes, ncodes, - dis_table, pq.ksub, k, - heap_dis, heap_ids); - return; - } - - if (pq.M % 4 == 0) { - pq_estimators_from_tables_Mmul4 (pq.M, codes, ncodes, - dis_table, pq.ksub, k, - heap_dis, heap_ids); - return; - } - - /* Default is relatively slow */ - const size_t M = pq.M; - const size_t ksub = pq.ksub; - for (size_t j = 0; j < ncodes; j++) { - float dis = 0; - const float * __restrict dt = dis_table; - for (int m = 0; m < M; m++) { - dis += dt[*codes++]; - dt += ksub; - } - if (C::cmp (heap_dis[0], dis)) { - heap_pop (k, heap_dis, heap_ids); - heap_push (k, heap_dis, heap_ids, dis, j); - } - } -} - -template -static inline void pq_estimators_from_tables_generic(const ProductQuantizer& pq, - size_t nbits, - const uint8_t *codes, - size_t ncodes, - const float *dis_table, - size_t k, - float *heap_dis, - int64_t *heap_ids) -{ - const size_t M = pq.M; - const size_t ksub = pq.ksub; - for (size_t j = 0; j < ncodes; ++j) { - faiss::ProductQuantizer::PQDecoderGeneric decoder( - codes + j * pq.code_size, nbits - ); - float dis = 0; - const float * __restrict dt = dis_table; - for (size_t m = 0; m < M; m++) { - uint64_t c = decoder.decode(); - dis += dt[c]; - dt += ksub; - } - - if (C::cmp(heap_dis[0], dis)) { - heap_pop(k, heap_dis, heap_ids); - heap_push(k, heap_dis, heap_ids, dis, j); - } - } -} - -/********************************************* - * PQ implementation - *********************************************/ - - - -ProductQuantizer::ProductQuantizer (size_t d, size_t M, size_t nbits): - d(d), M(M), nbits(nbits), assign_index(nullptr) -{ - set_derived_values (); -} - -ProductQuantizer::ProductQuantizer () - : ProductQuantizer(0, 1, 0) {} - -void ProductQuantizer::set_derived_values () { - // quite a few derived values - FAISS_THROW_IF_NOT (d % M == 0); - dsub = d / M; - code_size = (nbits * M + 7) / 8; - ksub = 1 << nbits; - centroids.resize (d * ksub); - verbose = false; - train_type = Train_default; -} - -void ProductQuantizer::set_params (const float * centroids_, int m) -{ - memcpy (get_centroids(m, 0), centroids_, - ksub * dsub * sizeof (centroids_[0])); -} - - -static void init_hypercube (int d, int nbits, - int n, const float * x, - float *centroids) -{ - - std::vector mean (d); - for (int i = 0; i < n; i++) - for (int j = 0; j < d; j++) - mean [j] += x[i * d + j]; - - float maxm = 0; - for (int j = 0; j < d; j++) { - mean [j] /= n; - if (fabs(mean[j]) > maxm) maxm = fabs(mean[j]); - } - - for (int i = 0; i < (1 << nbits); i++) { - float * cent = centroids + i * d; - for (int j = 0; j < nbits; j++) - cent[j] = mean [j] + (((i >> j) & 1) ? 1 : -1) * maxm; - for (int j = nbits; j < d; j++) - cent[j] = mean [j]; - } - - -} - -static void init_hypercube_pca (int d, int nbits, - int n, const float * x, - float *centroids) -{ - PCAMatrix pca (d, nbits); - pca.train (n, x); - - - for (int i = 0; i < (1 << nbits); i++) { - float * cent = centroids + i * d; - for (int j = 0; j < d; j++) { - cent[j] = pca.mean[j]; - float f = 1.0; - for (int k = 0; k < nbits; k++) - cent[j] += f * - sqrt (pca.eigenvalues [k]) * - (((i >> k) & 1) ? 1 : -1) * - pca.PCAMat [j + k * d]; - } - } - -} - -void ProductQuantizer::train (int n, const float * x) -{ - if (train_type != Train_shared) { - train_type_t final_train_type; - final_train_type = train_type; - if (train_type == Train_hypercube || - train_type == Train_hypercube_pca) { - if (dsub < nbits) { - final_train_type = Train_default; - printf ("cannot train hypercube: nbits=%ld > log2(d=%ld)\n", - nbits, dsub); - } - } - - float * xslice = new float[n * dsub]; - ScopeDeleter del (xslice); - for (int m = 0; m < M; m++) { - for (int j = 0; j < n; j++) - memcpy (xslice + j * dsub, - x + j * d + m * dsub, - dsub * sizeof(float)); - - Clustering clus (dsub, ksub, cp); - - // we have some initialization for the centroids - if (final_train_type != Train_default) { - clus.centroids.resize (dsub * ksub); - } - - switch (final_train_type) { - case Train_hypercube: - init_hypercube (dsub, nbits, n, xslice, - clus.centroids.data ()); - break; - case Train_hypercube_pca: - init_hypercube_pca (dsub, nbits, n, xslice, - clus.centroids.data ()); - break; - case Train_hot_start: - memcpy (clus.centroids.data(), - get_centroids (m, 0), - dsub * ksub * sizeof (float)); - break; - default: ; - } - - if(verbose) { - clus.verbose = true; - printf ("Training PQ slice %d/%zd\n", m, M); - } - IndexFlatL2 index (dsub); - clus.train (n, xslice, assign_index ? *assign_index : index); - set_params (clus.centroids.data(), m); - } - - - } else { - - Clustering clus (dsub, ksub, cp); - - if(verbose) { - clus.verbose = true; - printf ("Training all PQ slices at once\n"); - } - - IndexFlatL2 index (dsub); - - clus.train (n * M, x, assign_index ? *assign_index : index); - for (int m = 0; m < M; m++) { - set_params (clus.centroids.data(), m); - } - - } -} - -template -void compute_code(const ProductQuantizer& pq, const float *x, uint8_t *code) { - float distances [pq.ksub]; - PQEncoder encoder(code, pq.nbits); - for (size_t m = 0; m < pq.M; m++) { - float mindis = 1e20; - uint64_t idxm = 0; - const float * xsub = x + m * pq.dsub; - - fvec_L2sqr_ny(distances, xsub, pq.get_centroids(m, 0), pq.dsub, pq.ksub); - - /* Find best centroid */ - for (size_t i = 0; i < pq.ksub; i++) { - float dis = distances[i]; - if (dis < mindis) { - mindis = dis; - idxm = i; - } - } - - encoder.encode(idxm); - } -} - -void ProductQuantizer::compute_code(const float * x, uint8_t * code) const { - switch (nbits) { - case 8: - faiss::compute_code(*this, x, code); - break; - - case 16: - faiss::compute_code(*this, x, code); - break; - - default: - faiss::compute_code(*this, x, code); - break; - } -} - -template -void decode(const ProductQuantizer& pq, const uint8_t *code, float *x) -{ - PQDecoder decoder(code, pq.nbits); - for (size_t m = 0; m < pq.M; m++) { - uint64_t c = decoder.decode(); - memcpy(x + m * pq.dsub, pq.get_centroids(m, c), sizeof(float) * pq.dsub); - } -} - -void ProductQuantizer::decode (const uint8_t *code, float *x) const -{ - switch (nbits) { - case 8: - faiss::decode(*this, code, x); - break; - - case 16: - faiss::decode(*this, code, x); - break; - - default: - faiss::decode(*this, code, x); - break; - } -} - - -void ProductQuantizer::decode (const uint8_t *code, float *x, size_t n) const -{ - for (size_t i = 0; i < n; i++) { - this->decode (code + code_size * i, x + d * i); - } -} - - -void ProductQuantizer::compute_code_from_distance_table (const float *tab, - uint8_t *code) const -{ - PQEncoderGeneric encoder(code, nbits); - for (size_t m = 0; m < M; m++) { - float mindis = 1e20; - uint64_t idxm = 0; - - /* Find best centroid */ - for (size_t j = 0; j < ksub; j++) { - float dis = *tab++; - if (dis < mindis) { - mindis = dis; - idxm = j; - } - } - - encoder.encode(idxm); - } -} - -void ProductQuantizer::compute_codes_with_assign_index ( - const float * x, - uint8_t * codes, - size_t n) -{ - FAISS_THROW_IF_NOT (assign_index && assign_index->d == dsub); - - for (size_t m = 0; m < M; m++) { - assign_index->reset (); - assign_index->add (ksub, get_centroids (m, 0)); - size_t bs = 65536; - float * xslice = new float[bs * dsub]; - ScopeDeleter del (xslice); - idx_t *assign = new idx_t[bs]; - ScopeDeleter del2 (assign); - - for (size_t i0 = 0; i0 < n; i0 += bs) { - size_t i1 = std::min(i0 + bs, n); - - for (size_t i = i0; i < i1; i++) { - memcpy (xslice + (i - i0) * dsub, - x + i * d + m * dsub, - dsub * sizeof(float)); - } - - assign_index->assign (i1 - i0, xslice, assign); - - if (nbits == 8) { - uint8_t *c = codes + code_size * i0 + m; - for (size_t i = i0; i < i1; i++) { - *c = assign[i - i0]; - c += M; - } - } else if (nbits == 16) { - uint16_t *c = (uint16_t*)(codes + code_size * i0 + m * 2); - for (size_t i = i0; i < i1; i++) { - *c = assign[i - i0]; - c += M; - } - } else { - for (size_t i = i0; i < i1; ++i) { - uint8_t *c = codes + code_size * i + ((m * nbits) / 8); - uint8_t offset = (m * nbits) % 8; - uint64_t ass = assign[i - i0]; - - PQEncoderGeneric encoder(c, nbits, offset); - encoder.encode(ass); - } - } - - } - } - -} - -void ProductQuantizer::compute_codes (const float * x, - uint8_t * codes, - size_t n) const -{ - // process by blocks to avoid using too much RAM - size_t bs = 256 * 1024; - if (n > bs) { - for (size_t i0 = 0; i0 < n; i0 += bs) { - size_t i1 = std::min(i0 + bs, n); - compute_codes (x + d * i0, codes + code_size * i0, i1 - i0); - } - return; - } - - if (dsub < 16) { // simple direct computation - -#pragma omp parallel for - for (size_t i = 0; i < n; i++) - compute_code (x + i * d, codes + i * code_size); - - } else { // worthwile to use BLAS - float *dis_tables = new float [n * ksub * M]; - ScopeDeleter del (dis_tables); - compute_distance_tables (n, x, dis_tables); - -#pragma omp parallel for - for (size_t i = 0; i < n; i++) { - uint8_t * code = codes + i * code_size; - const float * tab = dis_tables + i * ksub * M; - compute_code_from_distance_table (tab, code); - } - } -} - - -void ProductQuantizer::compute_distance_table (const float * x, - float * dis_table) const -{ - size_t m; - - for (m = 0; m < M; m++) { - fvec_L2sqr_ny (dis_table + m * ksub, - x + m * dsub, - get_centroids(m, 0), - dsub, - ksub); - } -} - -void ProductQuantizer::compute_inner_prod_table (const float * x, - float * dis_table) const -{ - size_t m; - - for (m = 0; m < M; m++) { - fvec_inner_products_ny (dis_table + m * ksub, - x + m * dsub, - get_centroids(m, 0), - dsub, - ksub); - } -} - - -void ProductQuantizer::compute_distance_tables ( - size_t nx, - const float * x, - float * dis_tables) const -{ - - if (dsub < 16) { - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - compute_distance_table (x + i * d, dis_tables + i * ksub * M); - } - - } else { // use BLAS - - for (int m = 0; m < M; m++) { - pairwise_L2sqr (dsub, - nx, x + dsub * m, - ksub, centroids.data() + m * dsub * ksub, - dis_tables + ksub * m, - d, dsub, ksub * M); - } - } -} - -void ProductQuantizer::compute_inner_prod_tables ( - size_t nx, - const float * x, - float * dis_tables) const -{ - - if (dsub < 16) { - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - compute_inner_prod_table (x + i * d, dis_tables + i * ksub * M); - } - - } else { // use BLAS - - // compute distance tables - for (int m = 0; m < M; m++) { - FINTEGER ldc = ksub * M, nxi = nx, ksubi = ksub, - dsubi = dsub, di = d; - float one = 1.0, zero = 0; - - sgemm_ ("Transposed", "Not transposed", - &ksubi, &nxi, &dsubi, - &one, ¢roids [m * dsub * ksub], &dsubi, - x + dsub * m, &di, - &zero, dis_tables + ksub * m, &ldc); - } - - } -} - -template -static void pq_knn_search_with_tables ( - const ProductQuantizer& pq, - size_t nbits, - const float *dis_tables, - const uint8_t * codes, - const size_t ncodes, - HeapArray * res, - bool init_finalize_heap) -{ - size_t k = res->k, nx = res->nh; - size_t ksub = pq.ksub, M = pq.M; - - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - /* query preparation for asymmetric search: compute look-up tables */ - const float* dis_table = dis_tables + i * ksub * M; - - /* Compute distances and keep smallest values */ - int64_t * __restrict heap_ids = res->ids + i * k; - float * __restrict heap_dis = res->val + i * k; - - if (init_finalize_heap) { - heap_heapify (k, heap_dis, heap_ids); - } - - switch (nbits) { - case 8: - pq_estimators_from_tables (pq, - codes, ncodes, - dis_table, - k, heap_dis, heap_ids); - break; - - case 16: - pq_estimators_from_tables (pq, - (uint16_t*)codes, ncodes, - dis_table, - k, heap_dis, heap_ids); - break; - - default: - pq_estimators_from_tables_generic (pq, - nbits, - codes, ncodes, - dis_table, - k, heap_dis, heap_ids); - break; - } - - if (init_finalize_heap) { - heap_reorder (k, heap_dis, heap_ids); - } - } -} - -void ProductQuantizer::search (const float * __restrict x, - size_t nx, - const uint8_t * codes, - const size_t ncodes, - float_maxheap_array_t * res, - bool init_finalize_heap) const -{ - FAISS_THROW_IF_NOT (nx == res->nh); - std::unique_ptr dis_tables(new float [nx * ksub * M]); - compute_distance_tables (nx, x, dis_tables.get()); - - pq_knn_search_with_tables> ( - *this, nbits, dis_tables.get(), codes, ncodes, res, init_finalize_heap); -} - -void ProductQuantizer::search_ip (const float * __restrict x, - size_t nx, - const uint8_t * codes, - const size_t ncodes, - float_minheap_array_t * res, - bool init_finalize_heap) const -{ - FAISS_THROW_IF_NOT (nx == res->nh); - std::unique_ptr dis_tables(new float [nx * ksub * M]); - compute_inner_prod_tables (nx, x, dis_tables.get()); - - pq_knn_search_with_tables > ( - *this, nbits, dis_tables.get(), codes, ncodes, res, init_finalize_heap); -} - - - -static float sqr (float x) { - return x * x; -} - -void ProductQuantizer::compute_sdc_table () -{ - sdc_table.resize (M * ksub * ksub); - - for (int m = 0; m < M; m++) { - - const float *cents = centroids.data() + m * ksub * dsub; - float * dis_tab = sdc_table.data() + m * ksub * ksub; - - // TODO optimize with BLAS - for (int i = 0; i < ksub; i++) { - const float *centi = cents + i * dsub; - for (int j = 0; j < ksub; j++) { - float accu = 0; - const float *centj = cents + j * dsub; - for (int k = 0; k < dsub; k++) - accu += sqr (centi[k] - centj[k]); - dis_tab [i + j * ksub] = accu; - } - } - } -} - -void ProductQuantizer::search_sdc (const uint8_t * qcodes, - size_t nq, - const uint8_t * bcodes, - const size_t nb, - float_maxheap_array_t * res, - bool init_finalize_heap) const -{ - FAISS_THROW_IF_NOT (sdc_table.size() == M * ksub * ksub); - FAISS_THROW_IF_NOT (nbits == 8); - size_t k = res->k; - - -#pragma omp parallel for - for (size_t i = 0; i < nq; i++) { - - /* Compute distances and keep smallest values */ - idx_t * heap_ids = res->ids + i * k; - float * heap_dis = res->val + i * k; - const uint8_t * qcode = qcodes + i * code_size; - - if (init_finalize_heap) - maxheap_heapify (k, heap_dis, heap_ids); - - const uint8_t * bcode = bcodes; - for (size_t j = 0; j < nb; j++) { - float dis = 0; - const float * tab = sdc_table.data(); - for (int m = 0; m < M; m++) { - dis += tab[bcode[m] + qcode[m] * ksub]; - tab += ksub * ksub; - } - if (dis < heap_dis[0]) { - maxheap_pop (k, heap_dis, heap_ids); - maxheap_push (k, heap_dis, heap_ids, dis, j); - } - bcode += code_size; - } - - if (init_finalize_heap) - maxheap_reorder (k, heap_dis, heap_ids); - } - -} - - -ProductQuantizer::PQEncoderGeneric::PQEncoderGeneric(uint8_t *code, int nbits, - uint8_t offset) - : code(code), offset(offset), nbits(nbits), reg(0) { - assert(nbits <= 64); - if (offset > 0) { - reg = (*code & ((1 << offset) - 1)); - } -} - -void ProductQuantizer::PQEncoderGeneric::encode(uint64_t x) { - reg |= (uint8_t)(x << offset); - x >>= (8 - offset); - if (offset + nbits >= 8) { - *code++ = reg; - - for (int i = 0; i < (nbits - (8 - offset)) / 8; ++i) { - *code++ = (uint8_t)x; - x >>= 8; - } - - offset += nbits; - offset &= 7; - reg = (uint8_t)x; - } else { - offset += nbits; - } -} - -ProductQuantizer::PQEncoderGeneric::~PQEncoderGeneric() { - if (offset > 0) { - *code = reg; - } -} - - -ProductQuantizer::PQEncoder8::PQEncoder8(uint8_t *code, int nbits) - : code(code) { - assert(8 == nbits); -} - -void ProductQuantizer::PQEncoder8::encode(uint64_t x) { - *code++ = (uint8_t)x; -} - - -ProductQuantizer::PQEncoder16::PQEncoder16(uint8_t *code, int nbits) - : code((uint16_t *)code) { - assert(16 == nbits); -} - -void ProductQuantizer::PQEncoder16::encode(uint64_t x) { - *code++ = (uint16_t)x; -} - - -ProductQuantizer::PQDecoderGeneric::PQDecoderGeneric(const uint8_t *code, - int nbits) - : code(code), - offset(0), - nbits(nbits), - mask((1ull << nbits) - 1), - reg(0) { - assert(nbits <= 64); -} - -uint64_t ProductQuantizer::PQDecoderGeneric::decode() { - if (offset == 0) { - reg = *code; - } - uint64_t c = (reg >> offset); - - if (offset + nbits >= 8) { - uint64_t e = 8 - offset; - ++code; - for (int i = 0; i < (nbits - (8 - offset)) / 8; ++i) { - c |= ((uint64_t)(*code++) << e); - e += 8; - } - - offset += nbits; - offset &= 7; - if (offset > 0) { - reg = *code; - c |= ((uint64_t)reg << e); - } - } else { - offset += nbits; - } - - return c & mask; -} - - -ProductQuantizer::PQDecoder8::PQDecoder8(const uint8_t *code, int nbits) - : code(code) { - assert(8 == nbits); -} - -uint64_t ProductQuantizer::PQDecoder8::decode() { - return (uint64_t)(*code++); -} - - -ProductQuantizer::PQDecoder16::PQDecoder16(const uint8_t *code, int nbits) - : code((uint16_t *)code) { - assert(16 == nbits); -} - -uint64_t ProductQuantizer::PQDecoder16::decode() { - return (uint64_t)(*code++); -} - - -} // namespace faiss diff --git a/ProductQuantizer.h b/ProductQuantizer.h deleted file mode 100644 index 0c3cc9eb5e..0000000000 --- a/ProductQuantizer.h +++ /dev/null @@ -1,242 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#ifndef FAISS_PRODUCT_QUANTIZER_H -#define FAISS_PRODUCT_QUANTIZER_H - -#include - -#include - -#include "Clustering.h" -#include "Heap.h" - -namespace faiss { - -/** Product Quantizer. Implemented only for METRIC_L2 */ -struct ProductQuantizer { - - using idx_t = Index::idx_t; - - size_t d; ///< size of the input vectors - size_t M; ///< number of subquantizers - size_t nbits; ///< number of bits per quantization index - - // values derived from the above - size_t dsub; ///< dimensionality of each subvector - size_t code_size; ///< byte per indexed vector - size_t ksub; ///< number of centroids for each subquantizer - bool verbose; ///< verbose during training? - - /// initialization - enum train_type_t { - Train_default, - Train_hot_start, ///< the centroids are already initialized - Train_shared, ///< share dictionary accross PQ segments - Train_hypercube, ///< intialize centroids with nbits-D hypercube - Train_hypercube_pca, ///< intialize centroids with nbits-D hypercube - }; - train_type_t train_type; - - ClusteringParameters cp; ///< parameters used during clustering - - /// if non-NULL, use this index for assignment (should be of size - /// d / M) - Index *assign_index; - - /// Centroid table, size M * ksub * dsub - std::vector centroids; - - /// return the centroids associated with subvector m - float * get_centroids (size_t m, size_t i) { - return ¢roids [(m * ksub + i) * dsub]; - } - const float * get_centroids (size_t m, size_t i) const { - return ¢roids [(m * ksub + i) * dsub]; - } - - // Train the product quantizer on a set of points. A clustering - // can be set on input to define non-default clustering parameters - void train (int n, const float *x); - - ProductQuantizer(size_t d, /* dimensionality of the input vectors */ - size_t M, /* number of subquantizers */ - size_t nbits); /* number of bit per subvector index */ - - ProductQuantizer (); - - /// compute derived values when d, M and nbits have been set - void set_derived_values (); - - /// Define the centroids for subquantizer m - void set_params (const float * centroids, int m); - - /// Quantize one vector with the product quantizer - void compute_code (const float * x, uint8_t * code) const ; - - /// same as compute_code for several vectors - void compute_codes (const float * x, - uint8_t * codes, - size_t n) const ; - - /// speed up code assignment using assign_index - /// (non-const because the index is changed) - void compute_codes_with_assign_index ( - const float * x, - uint8_t * codes, - size_t n); - - /// decode a vector from a given code (or n vectors if third argument) - void decode (const uint8_t *code, float *x) const; - void decode (const uint8_t *code, float *x, size_t n) const; - - /// If we happen to have the distance tables precomputed, this is - /// more efficient to compute the codes. - void compute_code_from_distance_table (const float *tab, - uint8_t *code) const; - - - /** Compute distance table for one vector. - * - * The distance table for x = [x_0 x_1 .. x_(M-1)] is a M * ksub - * matrix that contains - * - * dis_table (m, j) = || x_m - c_(m, j)||^2 - * for m = 0..M-1 and j = 0 .. ksub - 1 - * - * where c_(m, j) is the centroid no j of sub-quantizer m. - * - * @param x input vector size d - * @param dis_table output table, size M * ksub - */ - void compute_distance_table (const float * x, - float * dis_table) const; - - void compute_inner_prod_table (const float * x, - float * dis_table) const; - - - /** compute distance table for several vectors - * @param nx nb of input vectors - * @param x input vector size nx * d - * @param dis_table output table, size nx * M * ksub - */ - void compute_distance_tables (size_t nx, - const float * x, - float * dis_tables) const; - - void compute_inner_prod_tables (size_t nx, - const float * x, - float * dis_tables) const; - - - /** perform a search (L2 distance) - * @param x query vectors, size nx * d - * @param nx nb of queries - * @param codes database codes, size ncodes * code_size - * @param ncodes nb of nb vectors - * @param res heap array to store results (nh == nx) - * @param init_finalize_heap initialize heap (input) and sort (output)? - */ - void search (const float * x, - size_t nx, - const uint8_t * codes, - const size_t ncodes, - float_maxheap_array_t *res, - bool init_finalize_heap = true) const; - - /** same as search, but with inner product similarity */ - void search_ip (const float * x, - size_t nx, - const uint8_t * codes, - const size_t ncodes, - float_minheap_array_t *res, - bool init_finalize_heap = true) const; - - - /// Symmetric Distance Table - std::vector sdc_table; - - // intitialize the SDC table from the centroids - void compute_sdc_table (); - - void search_sdc (const uint8_t * qcodes, - size_t nq, - const uint8_t * bcodes, - const size_t ncodes, - float_maxheap_array_t * res, - bool init_finalize_heap = true) const; - - struct PQEncoderGeneric { - uint8_t *code; ///< code for this vector - uint8_t offset; - const int nbits; ///< number of bits per subquantizer index - - uint8_t reg; - - PQEncoderGeneric(uint8_t *code, int nbits, uint8_t offset = 0); - - void encode(uint64_t x); - - ~PQEncoderGeneric(); - }; - - - struct PQEncoder8 { - uint8_t *code; - - PQEncoder8(uint8_t *code, int nbits); - - void encode(uint64_t x); - }; - - struct PQEncoder16 { - uint16_t *code; - - PQEncoder16(uint8_t *code, int nbits); - - void encode(uint64_t x); - }; - - - struct PQDecoderGeneric { - const uint8_t *code; - uint8_t offset; - const int nbits; - const uint64_t mask; - uint8_t reg; - - PQDecoderGeneric(const uint8_t *code, int nbits); - - uint64_t decode(); - }; - - struct PQDecoder8 { - const uint8_t *code; - - PQDecoder8(const uint8_t *code, int nbits); - - uint64_t decode(); - }; - - struct PQDecoder16 { - const uint16_t *code; - - PQDecoder16(const uint8_t *code, int nbits); - - uint64_t decode(); - }; - -}; - - -} // namespace faiss - - -#endif diff --git a/ThreadedIndex-inl.h b/ThreadedIndex-inl.h deleted file mode 100644 index 7416fe2c1d..0000000000 --- a/ThreadedIndex-inl.h +++ /dev/null @@ -1,192 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include "FaissAssert.h" -#include -#include - -namespace faiss { - -template -ThreadedIndex::ThreadedIndex(bool threaded) - // 0 is default dimension - : ThreadedIndex(0, threaded) { -} - -template -ThreadedIndex::ThreadedIndex(int d, bool threaded) - : IndexT(d), - own_fields(false), - isThreaded_(threaded) { - } - -template -ThreadedIndex::~ThreadedIndex() { - for (auto& p : indices_) { - if (isThreaded_) { - // should have worker thread - FAISS_ASSERT((bool) p.second); - - // This will also flush all pending work - p.second->stop(); - p.second->waitForThreadExit(); - } else { - // should not have worker thread - FAISS_ASSERT(!(bool) p.second); - } - - if (own_fields) { - delete p.first; - } - } -} - -template -void ThreadedIndex::addIndex(IndexT* index) { - // We inherit the dimension from the first index added to us if we don't have - // a set dimension - if (indices_.empty() && this->d == 0) { - this->d = index->d; - } - - // The new index must match our set dimension - FAISS_THROW_IF_NOT_FMT(this->d == index->d, - "addIndex: dimension mismatch for " - "newly added index; expecting dim %d, " - "new index has dim %d", - this->d, index->d); - - if (!indices_.empty()) { - auto& existing = indices_.front().first; - - FAISS_THROW_IF_NOT_MSG(index->metric_type == existing->metric_type, - "addIndex: newly added index is " - "of different metric type than old index"); - - // Make sure this index is not duplicated - for (auto& p : indices_) { - FAISS_THROW_IF_NOT_MSG(p.first != index, - "addIndex: attempting to add index " - "that is already in the collection"); - } - } - - indices_.emplace_back( - std::make_pair( - index, - std::unique_ptr(isThreaded_ ? - new WorkerThread : nullptr))); - - onAfterAddIndex(index); -} - -template -void ThreadedIndex::removeIndex(IndexT* index) { - for (auto it = indices_.begin(); it != indices_.end(); ++it) { - if (it->first == index) { - // This is our index; stop the worker thread before removing it, - // to ensure that it has finished before function exit - if (isThreaded_) { - // should have worker thread - FAISS_ASSERT((bool) it->second); - it->second->stop(); - it->second->waitForThreadExit(); - } else { - // should not have worker thread - FAISS_ASSERT(!(bool) it->second); - } - - indices_.erase(it); - onAfterRemoveIndex(index); - - if (own_fields) { - delete index; - } - - return; - } - } - - // could not find our index - FAISS_THROW_MSG("IndexReplicas::removeIndex: index not found"); -} - -template -void ThreadedIndex::runOnIndex(std::function f) { - if (isThreaded_) { - std::vector> v; - - for (int i = 0; i < this->indices_.size(); ++i) { - auto& p = this->indices_[i]; - auto indexPtr = p.first; - v.emplace_back(p.second->add([f, i, indexPtr](){ f(i, indexPtr); })); - } - - waitAndHandleFutures(v); - } else { - // Multiple exceptions may be thrown; gather them as we encounter them, - // while letting everything else run to completion - std::vector> exceptions; - - for (int i = 0; i < this->indices_.size(); ++i) { - auto& p = this->indices_[i]; - try { - f(i, p.first); - } catch (...) { - exceptions.emplace_back(std::make_pair(i, std::current_exception())); - } - } - - handleExceptions(exceptions); - } -} - -template -void ThreadedIndex::runOnIndex( - std::function f) const { - const_cast*>(this)->runOnIndex( - [f](int i, IndexT* idx){ f(i, idx); }); -} - -template -void ThreadedIndex::reset() { - runOnIndex([](int, IndexT* index){ index->reset(); }); - this->ntotal = 0; - this->is_trained = false; -} - -template -void -ThreadedIndex::onAfterAddIndex(IndexT* index) { -} - -template -void -ThreadedIndex::onAfterRemoveIndex(IndexT* index) { -} - -template -void -ThreadedIndex::waitAndHandleFutures(std::vector>& v) { - // Blocking wait for completion for all of the indices, capturing any - // exceptions that are generated - std::vector> exceptions; - - for (int i = 0; i < v.size(); ++i) { - auto& fut = v[i]; - - try { - fut.get(); - } catch (...) { - exceptions.emplace_back(std::make_pair(i, std::current_exception())); - } - } - - handleExceptions(exceptions); -} - -} // namespace diff --git a/ThreadedIndex.h b/ThreadedIndex.h deleted file mode 100644 index 2e6632a72f..0000000000 --- a/ThreadedIndex.h +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include "Index.h" -#include "IndexBinary.h" -#include "WorkerThread.h" -#include -#include - -namespace faiss { - -/// A holder of indices in a collection of threads -/// The interface to this class itself is not thread safe -template -class ThreadedIndex : public IndexT { - public: - explicit ThreadedIndex(bool threaded); - explicit ThreadedIndex(int d, bool threaded); - - ~ThreadedIndex() override; - - /// override an index that is managed by ourselves. - /// WARNING: once an index is added, it becomes unsafe to touch it from any - /// other thread than that on which is managing it, until we are shut - /// down. Use runOnIndex to perform work on it instead. - void addIndex(IndexT* index); - - /// Remove an index that is managed by ourselves. - /// This will flush all pending work on that index, and then shut - /// down its managing thread, and will remove the index. - void removeIndex(IndexT* index); - - /// Run a function on all indices, in the thread that the index is - /// managed in. - /// Function arguments are (index in collection, index pointer) - void runOnIndex(std::function f); - void runOnIndex(std::function f) const; - - /// faiss::Index API - /// All indices receive the same call - void reset() override; - - /// Returns the number of sub-indices - int count() const { return indices_.size(); } - - /// Returns the i-th sub-index - IndexT* at(int i) { return indices_[i].first; } - - /// Returns the i-th sub-index (const version) - const IndexT* at(int i) const { return indices_[i].first; } - - /// Whether or not we are responsible for deleting our contained indices - bool own_fields; - - protected: - /// Called just after an index is added - virtual void onAfterAddIndex(IndexT* index); - - /// Called just after an index is removed - virtual void onAfterRemoveIndex(IndexT* index); - -protected: - static void waitAndHandleFutures(std::vector>& v); - - /// Collection of Index instances, with their managing worker thread if any - std::vector>> indices_; - - /// Is this index multi-threaded? - bool isThreaded_; -}; - -} // namespace - -#include "ThreadedIndex-inl.h" diff --git a/WorkerThread.cpp b/WorkerThread.cpp deleted file mode 100644 index 6e9c5a5dc5..0000000000 --- a/WorkerThread.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - - -#include "WorkerThread.h" -#include "FaissAssert.h" -#include - -namespace faiss { - -namespace { - -// Captures any exceptions thrown by the lambda and returns them via the promise -void runCallback(std::function& fn, - std::promise& promise) { - try { - fn(); - promise.set_value(true); - } catch (...) { - promise.set_exception(std::current_exception()); - } -} - -} // namespace - -WorkerThread::WorkerThread() : - wantStop_(false) { - startThread(); - - // Make sure that the thread has started before continuing - add([](){}).get(); -} - -WorkerThread::~WorkerThread() { - stop(); - waitForThreadExit(); -} - -void -WorkerThread::startThread() { - thread_ = std::thread([this](){ threadMain(); }); -} - -void -WorkerThread::stop() { - std::lock_guard guard(mutex_); - - wantStop_ = true; - monitor_.notify_one(); -} - -std::future -WorkerThread::add(std::function f) { - std::lock_guard guard(mutex_); - - if (wantStop_) { - // The timer thread has been stopped, or we want to stop; we can't - // schedule anything else - std::promise p; - auto fut = p.get_future(); - - // did not execute - p.set_value(false); - return fut; - } - - auto pr = std::promise(); - auto fut = pr.get_future(); - - queue_.emplace_back(std::make_pair(std::move(f), std::move(pr))); - - // Wake up our thread - monitor_.notify_one(); - return fut; -} - -void -WorkerThread::threadMain() { - threadLoop(); - - // Call all pending tasks - FAISS_ASSERT(wantStop_); - - // flush all pending operations - for (auto& f : queue_) { - runCallback(f.first, f.second); - } -} - -void -WorkerThread::threadLoop() { - while (true) { - std::pair, std::promise> data; - - { - std::unique_lock lock(mutex_); - - while (!wantStop_ && queue_.empty()) { - monitor_.wait(lock); - } - - if (wantStop_) { - return; - } - - data = std::move(queue_.front()); - queue_.pop_front(); - } - - runCallback(data.first, data.second); - } -} - -void -WorkerThread::waitForThreadExit() { - try { - thread_.join(); - } catch (...) { - } -} - -} // namespace diff --git a/WorkerThread.h b/WorkerThread.h deleted file mode 100644 index 7ab21e9f90..0000000000 --- a/WorkerThread.h +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - - -#pragma once - -#include -#include -#include -#include - -namespace faiss { - -class WorkerThread { - public: - WorkerThread(); - - /// Stops and waits for the worker thread to exit, flushing all - /// pending lambdas - ~WorkerThread(); - - /// Request that the worker thread stop itself - void stop(); - - /// Blocking waits in the current thread for the worker thread to - /// stop - void waitForThreadExit(); - - /// Adds a lambda to run on the worker thread; returns a future that - /// can be used to block on its completion. - /// Future status is `true` if the lambda was run in the worker - /// thread; `false` if it was not run, because the worker thread is - /// exiting or has exited. - std::future add(std::function f); - - private: - void startThread(); - void threadMain(); - void threadLoop(); - - /// Thread that all queued lambdas are run on - std::thread thread_; - - /// Mutex for the queue and exit status - std::mutex mutex_; - - /// Monitor for the exit status and the queue - std::condition_variable monitor_; - - /// Whether or not we want the thread to exit - bool wantStop_; - - /// Queue of pending lambdas to call - std::deque, std::promise>> queue_; -}; - -} // namespace diff --git a/depend b/depend index 3d59c92978..36d44cc072 100644 --- a/depend +++ b/depend @@ -1,19 +1,15 @@ -utils.o: utils.cpp utils.h Heap.h AuxIndexStructures.h Index.h \ - FaissAssert.h FaissException.h -IndexIVFPQR.o: IndexIVFPQR.cpp faiss/IndexIVFPQR.h faiss/IndexIVFPQ.h \ - faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ - faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ - faiss/impl/PolysemousTraining.h faiss/utils/utils.h \ - faiss/utils/distances.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h -OnDiskInvertedLists.o: OnDiskInvertedLists.cpp \ - faiss/OnDiskInvertedLists.h faiss/IndexIVF.h faiss/Index.h \ - faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ - faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/utils/utils.h +IndexPreTransform.o: IndexPreTransform.cpp faiss/IndexPreTransform.h \ + faiss/Index.h faiss/VectorTransform.h faiss/utils/utils.h \ + faiss/utils/Heap.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h IndexFlat.o: IndexFlat.cpp faiss/IndexFlat.h faiss/Index.h \ faiss/utils/distances.h faiss/utils/Heap.h faiss/utils/extra_distances.h \ faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ faiss/impl/AuxIndexStructures.h +IndexBinaryFlat.o: IndexBinaryFlat.cpp faiss/IndexBinaryFlat.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/Index.h faiss/utils/hamming.h faiss/utils/Heap.h \ + faiss/utils/hamming-inl.h faiss/utils/utils.h \ + faiss/impl/AuxIndexStructures.h IndexIVFSpectralHash.o: IndexIVFSpectralHash.cpp \ faiss/IndexIVFSpectralHash.h faiss/IndexIVF.h faiss/Index.h \ faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ @@ -23,6 +19,53 @@ IndexIVFSpectralHash.o: IndexIVFSpectralHash.cpp \ InvertedLists.o: InvertedLists.cpp faiss/InvertedLists.h faiss/Index.h \ faiss/utils/utils.h faiss/utils/Heap.h faiss/impl/FaissAssert.h \ faiss/impl/FaissException.h +IndexLSH.o: IndexLSH.cpp faiss/IndexLSH.h faiss/Index.h \ + faiss/VectorTransform.h faiss/utils/utils.h faiss/utils/Heap.h \ + faiss/utils/hamming.h faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +IndexShards.o: IndexShards.cpp faiss/IndexShards.h faiss/Index.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ + faiss/impl/ThreadedIndex-inl.h faiss/utils/Heap.h +IndexBinaryIVF.o: IndexBinaryIVF.cpp faiss/IndexBinaryIVF.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/Index.h faiss/IndexIVF.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/utils/hamming.h faiss/utils/hamming-inl.h \ + faiss/utils/utils.h faiss/impl/AuxIndexStructures.h faiss/IndexFlat.h +IndexHNSW.o: IndexHNSW.cpp faiss/IndexHNSW.h faiss/impl/HNSW.h \ + faiss/Index.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/random.h faiss/utils/Heap.h faiss/IndexFlat.h \ + faiss/IndexPQ.h faiss/impl/ProductQuantizer.h faiss/Clustering.h \ + faiss/impl/PolysemousTraining.h faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/InvertedLists.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/utils/utils.h \ + faiss/utils/distances.h faiss/IndexIVFPQ.h faiss/Index2Layer.h +IndexBinaryFromFloat.o: IndexBinaryFromFloat.cpp \ + faiss/IndexBinaryFromFloat.h faiss/IndexBinary.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/Index.h \ + faiss/utils/utils.h faiss/utils/Heap.h +AutoTune.o: AutoTune.cpp faiss/AutoTune.h faiss/Index.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/utils.h faiss/utils/Heap.h faiss/utils/random.h \ + faiss/IndexFlat.h faiss/VectorTransform.h faiss/IndexPreTransform.h \ + faiss/IndexLSH.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/Clustering.h faiss/impl/PolysemousTraining.h faiss/IndexIVF.h \ + faiss/InvertedLists.h faiss/IndexIVFPQ.h faiss/IndexIVFPQR.h \ + faiss/IndexIVFFlat.h faiss/MetaIndexes.h faiss/IndexShards.h \ + faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ + faiss/impl/ThreadedIndex-inl.h faiss/IndexReplicas.h \ + faiss/IndexScalarQuantizer.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/IndexHNSW.h faiss/impl/HNSW.h \ + faiss/IndexBinaryFlat.h faiss/IndexBinaryHNSW.h faiss/IndexBinaryIVF.h +Clustering.o: Clustering.cpp faiss/Clustering.h faiss/Index.h \ + faiss/impl/AuxIndexStructures.h faiss/utils/utils.h faiss/utils/Heap.h \ + faiss/utils/random.h faiss/utils/distances.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/IndexFlat.h +MetaIndexes.o: MetaIndexes.cpp faiss/MetaIndexes.h faiss/Index.h \ + faiss/IndexShards.h faiss/IndexBinary.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexReplicas.h faiss/utils/Heap.h faiss/impl/AuxIndexStructures.h index_factory.o: index_factory.cpp faiss/AutoTune.h faiss/Index.h \ faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ faiss/utils/utils.h faiss/utils/Heap.h faiss/utils/random.h \ @@ -38,21 +81,43 @@ index_factory.o: index_factory.cpp faiss/AutoTune.h faiss/Index.h \ faiss/IndexHNSW.h faiss/impl/HNSW.h faiss/IndexLattice.h \ faiss/impl/lattice_Zn.h faiss/IndexBinaryFlat.h faiss/IndexBinaryHNSW.h \ faiss/IndexBinaryIVF.h -IndexBinaryIVF.o: IndexBinaryIVF.cpp faiss/IndexBinaryIVF.h \ - faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/Index.h faiss/IndexIVF.h faiss/InvertedLists.h faiss/Clustering.h \ - faiss/utils/Heap.h faiss/utils/hamming.h faiss/utils/hamming-inl.h \ - faiss/utils/utils.h faiss/impl/AuxIndexStructures.h faiss/IndexFlat.h -ProductQuantizer.o: ProductQuantizer.cpp ProductQuantizer.h Clustering.h \ - faiss/Index.h Heap.h FaissAssert.h FaissException.h VectorTransform.h \ - IndexFlat.h utils.h -Heap.o: Heap.cpp Heap.h VectorTransform.o: VectorTransform.cpp faiss/VectorTransform.h \ faiss/Index.h faiss/utils/distances.h faiss/utils/Heap.h \ faiss/utils/random.h faiss/utils/utils.h faiss/impl/FaissAssert.h \ faiss/impl/FaissException.h faiss/IndexPQ.h \ faiss/impl/ProductQuantizer.h faiss/Clustering.h \ faiss/impl/PolysemousTraining.h +IndexIVF.o: IndexIVF.cpp faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/utils/utils.h faiss/utils/hamming.h faiss/utils/hamming-inl.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/IndexFlat.h \ + faiss/impl/AuxIndexStructures.h +IndexIVFPQ.o: IndexIVFPQ.cpp faiss/IndexIVFPQ.h faiss/IndexIVF.h \ + faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/impl/PolysemousTraining.h faiss/utils/utils.h \ + faiss/utils/distances.h faiss/IndexFlat.h faiss/utils/hamming.h \ + faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h +OnDiskInvertedLists.o: OnDiskInvertedLists.cpp \ + faiss/OnDiskInvertedLists.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/utils/utils.h +IndexIVFPQR.o: IndexIVFPQR.cpp faiss/IndexIVFPQR.h faiss/IndexIVFPQ.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/impl/PolysemousTraining.h faiss/utils/utils.h \ + faiss/utils/distances.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +MatrixStats.o: MatrixStats.cpp faiss/MatrixStats.h faiss/utils/utils.h \ + faiss/utils/Heap.h +IndexBinary.o: IndexBinary.cpp faiss/IndexBinary.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/Index.h +IndexPQ.o: IndexPQ.cpp faiss/IndexPQ.h faiss/Index.h \ + faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/PolysemousTraining.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h \ + faiss/utils/hamming.h faiss/utils/hamming-inl.h clone_index.o: clone_index.cpp faiss/clone_index.h \ faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/IndexFlat.h \ faiss/Index.h faiss/VectorTransform.h faiss/IndexPreTransform.h \ @@ -67,80 +132,33 @@ clone_index.o: clone_index.cpp faiss/clone_index.h \ faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ faiss/IndexHNSW.h faiss/impl/HNSW.h faiss/utils/random.h \ faiss/utils/utils.h faiss/IndexLattice.h faiss/impl/lattice_Zn.h -Index.o: Index.cpp faiss/Index.h faiss/impl/AuxIndexStructures.h \ - faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/utils/distances.h faiss/utils/Heap.h -AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h \ - FaissAssert.h FaissException.h -IndexHNSW.o: IndexHNSW.cpp faiss/IndexHNSW.h faiss/impl/HNSW.h \ - faiss/Index.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/utils/random.h faiss/utils/Heap.h faiss/IndexFlat.h \ - faiss/IndexPQ.h faiss/impl/ProductQuantizer.h faiss/Clustering.h \ - faiss/impl/PolysemousTraining.h faiss/IndexScalarQuantizer.h \ - faiss/IndexIVF.h faiss/InvertedLists.h faiss/impl/ScalarQuantizer.h \ - faiss/impl/AuxIndexStructures.h faiss/utils/utils.h \ - faiss/utils/distances.h faiss/IndexIVFPQ.h faiss/Index2Layer.h -IndexIVF.o: IndexIVF.cpp faiss/IndexIVF.h faiss/Index.h \ - faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ - faiss/utils/utils.h faiss/utils/hamming.h faiss/utils/hamming-inl.h \ - faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/IndexFlat.h \ +IndexIVFFlat.o: IndexIVFFlat.cpp faiss/IndexIVFFlat.h faiss/IndexIVF.h \ + faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexFlat.h faiss/utils/distances.h \ + faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ faiss/impl/AuxIndexStructures.h -FaissException.o: FaissException.cpp FaissException.h -MatrixStats.o: MatrixStats.cpp faiss/MatrixStats.h faiss/utils/utils.h \ - faiss/utils/Heap.h IndexReplicas.o: IndexReplicas.cpp faiss/IndexReplicas.h faiss/Index.h \ faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ faiss/impl/ThreadedIndex-inl.h -HNSW.o: HNSW.cpp HNSW.h Index.h FaissAssert.h FaissException.h utils.h \ - Heap.h AuxIndexStructures.h -IndexLattice.o: IndexLattice.cpp faiss/IndexLattice.h faiss/IndexIVF.h \ - faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ - faiss/utils/Heap.h faiss/impl/lattice_Zn.h faiss/utils/hamming.h \ - faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h faiss/utils/distances.h -hamming.o: hamming.cpp hamming.h Heap.h FaissAssert.h FaissException.h -IndexBinaryFlat.o: IndexBinaryFlat.cpp faiss/IndexBinaryFlat.h \ - faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/Index.h faiss/utils/hamming.h faiss/utils/Heap.h \ - faiss/utils/hamming-inl.h faiss/utils/utils.h \ - faiss/impl/AuxIndexStructures.h -IndexLSH.o: IndexLSH.cpp faiss/IndexLSH.h faiss/Index.h \ - faiss/VectorTransform.h faiss/utils/utils.h faiss/utils/Heap.h \ - faiss/utils/hamming.h faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h -IndexShards.o: IndexShards.cpp faiss/IndexShards.h faiss/Index.h \ - faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ - faiss/impl/ThreadedIndex-inl.h faiss/utils/Heap.h -IndexPreTransform.o: IndexPreTransform.cpp faiss/IndexPreTransform.h \ - faiss/Index.h faiss/VectorTransform.h faiss/utils/utils.h \ - faiss/utils/Heap.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h -PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \ - ProductQuantizer.h Clustering.h faiss/Index.h Heap.h utils.h hamming.h \ - FaissAssert.h FaissException.h -MetaIndexes.o: MetaIndexes.cpp faiss/MetaIndexes.h faiss/Index.h \ - faiss/IndexShards.h faiss/IndexBinary.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h faiss/impl/ThreadedIndex.h \ - faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ - faiss/IndexReplicas.h faiss/utils/Heap.h faiss/impl/AuxIndexStructures.h -IndexIVFPQ.o: IndexIVFPQ.cpp faiss/IndexIVFPQ.h faiss/IndexIVF.h \ - faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ - faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ - faiss/impl/PolysemousTraining.h faiss/utils/utils.h \ - faiss/utils/distances.h faiss/IndexFlat.h faiss/utils/hamming.h \ - faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h +IVFlib.o: IVFlib.cpp faiss/IVFlib.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/IndexPreTransform.h faiss/VectorTransform.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +Index.o: Index.cpp faiss/Index.h faiss/impl/AuxIndexStructures.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/distances.h faiss/utils/Heap.h +IndexScalarQuantizer.o: IndexScalarQuantizer.cpp \ + faiss/IndexScalarQuantizer.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h IndexBinaryHNSW.o: IndexBinaryHNSW.cpp faiss/IndexBinaryHNSW.h \ faiss/impl/HNSW.h faiss/Index.h faiss/impl/FaissAssert.h \ faiss/impl/FaissException.h faiss/utils/random.h faiss/utils/Heap.h \ faiss/IndexBinaryFlat.h faiss/IndexBinary.h faiss/utils/utils.h \ faiss/utils/hamming.h faiss/utils/hamming-inl.h \ faiss/impl/AuxIndexStructures.h -IndexBinaryFromFloat.o: IndexBinaryFromFloat.cpp \ - faiss/IndexBinaryFromFloat.h faiss/IndexBinary.h \ - faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/Index.h \ - faiss/utils/utils.h faiss/utils/Heap.h Index2Layer.o: Index2Layer.cpp faiss/Index2Layer.h faiss/IndexPQ.h \ faiss/Index.h faiss/impl/ProductQuantizer.h faiss/Clustering.h \ faiss/utils/Heap.h faiss/impl/PolysemousTraining.h faiss/IndexIVF.h \ @@ -148,64 +166,11 @@ Index2Layer.o: Index2Layer.cpp faiss/Index2Layer.h faiss/IndexPQ.h \ faiss/impl/FaissException.h faiss/utils/utils.h \ faiss/impl/AuxIndexStructures.h faiss/IndexFlat.h \ faiss/utils/distances.h -WorkerThread.o: WorkerThread.cpp WorkerThread.h FaissAssert.h \ - FaissException.h -IndexPQ.o: IndexPQ.cpp faiss/IndexPQ.h faiss/Index.h \ - faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/utils/Heap.h \ - faiss/impl/PolysemousTraining.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h \ - faiss/utils/hamming.h faiss/utils/hamming-inl.h -IndexIVFFlat.o: IndexIVFFlat.cpp faiss/IndexIVFFlat.h faiss/IndexIVF.h \ +IndexLattice.o: IndexLattice.cpp faiss/IndexLattice.h faiss/IndexIVF.h \ faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ - faiss/utils/Heap.h faiss/IndexFlat.h faiss/utils/distances.h \ - faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/impl/AuxIndexStructures.h -IndexBinary.o: IndexBinary.cpp faiss/IndexBinary.h \ - faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/Index.h -IndexScalarQuantizer.o: IndexScalarQuantizer.cpp \ - faiss/IndexScalarQuantizer.h faiss/IndexIVF.h faiss/Index.h \ - faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ - faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ - faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h -utils_simd.o: utils_simd.cpp utils.h Heap.h -AutoTune.o: AutoTune.cpp faiss/AutoTune.h faiss/Index.h \ - faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ - faiss/utils/utils.h faiss/utils/Heap.h faiss/utils/random.h \ - faiss/IndexFlat.h faiss/VectorTransform.h faiss/IndexPreTransform.h \ - faiss/IndexLSH.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ - faiss/Clustering.h faiss/impl/PolysemousTraining.h faiss/IndexIVF.h \ - faiss/InvertedLists.h faiss/IndexIVFPQ.h faiss/IndexIVFPQR.h \ - faiss/IndexIVFFlat.h faiss/MetaIndexes.h faiss/IndexShards.h \ - faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ - faiss/impl/ThreadedIndex-inl.h faiss/IndexReplicas.h \ - faiss/IndexScalarQuantizer.h faiss/impl/ScalarQuantizer.h \ - faiss/impl/AuxIndexStructures.h faiss/IndexHNSW.h faiss/impl/HNSW.h \ - faiss/IndexBinaryFlat.h faiss/IndexBinaryHNSW.h faiss/IndexBinaryIVF.h -Clustering.o: Clustering.cpp faiss/Clustering.h faiss/Index.h \ - faiss/impl/AuxIndexStructures.h faiss/utils/utils.h faiss/utils/Heap.h \ - faiss/utils/random.h faiss/utils/distances.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h faiss/IndexFlat.h -IVFlib.o: IVFlib.cpp faiss/IVFlib.h faiss/IndexIVF.h faiss/Index.h \ - faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ - faiss/IndexPreTransform.h faiss/VectorTransform.h \ - faiss/impl/FaissAssert.h faiss/impl/FaissException.h -index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \ - AuxIndexStructures.h Index.h IndexFlat.h faiss/Index.h VectorTransform.h \ - IndexLSH.h faiss/VectorTransform.h IndexPQ.h \ - faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/utils/Heap.h \ - faiss/impl/PolysemousTraining.h IndexIVF.h faiss/InvertedLists.h \ - IndexIVFPQ.h faiss/IndexIVF.h faiss/IndexPQ.h IndexIVFFlat.h \ - IndexIVFSpectralHash.h MetaIndexes.h faiss/IndexShards.h \ - faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/ThreadedIndex.h \ - faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ - faiss/IndexReplicas.h IndexScalarQuantizer.h \ - faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h IndexHNSW.h \ - faiss/impl/HNSW.h faiss/utils/random.h faiss/IndexFlat.h \ - faiss/IndexScalarQuantizer.h faiss/utils/utils.h OnDiskInvertedLists.h \ - IndexBinaryFlat.h IndexBinaryFromFloat.h IndexBinaryHNSW.h \ - faiss/IndexBinaryFlat.h IndexBinaryIVF.h -distances.o: distances.cpp distances.h Index.h Heap.h utils.h \ - FaissAssert.h FaissException.h AuxIndexStructures.h + faiss/utils/Heap.h faiss/impl/lattice_Zn.h faiss/utils/hamming.h \ + faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/utils/distances.h GpuCloner.o: gpu/GpuCloner.cpp faiss/gpu/GpuCloner.h faiss/Index.h \ faiss/clone_index.h faiss/gpu/GpuClonerOptions.h \ faiss/gpu/GpuIndicesOptions.h faiss/gpu/GpuIndex.h \ @@ -364,15 +329,6 @@ GpuDistance.o: gpu/GpuDistance.cu faiss/gpu/GpuDistance.h faiss/Index.h \ faiss/gpu/utils/Float16.cuh faiss/gpu/utils/ConversionOperators.cuh \ faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/HostTensor.cuh \ faiss/gpu/utils/HostTensor-inl.cuh -InvertedListAppend.o: gpu/impl/InvertedListAppend.cu \ - gpu/impl/InvertedListAppend.cuh gpu/impl/../GpuIndicesOptions.h \ - gpu/impl/../utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ - faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ - faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ - gpu/impl/../../FaissAssert.h gpu/impl/../utils/Float16.cuh \ - faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ - faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/MemorySpace.h \ - faiss/gpu/utils/DeviceTensor-inl.cuh gpu/impl/../utils/StaticUtils.h Distance.o: gpu/impl/Distance.cu faiss/gpu/impl/Distance.cuh \ faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ diff --git a/distances.cpp b/distances.cpp deleted file mode 100644 index adf23e0e88..0000000000 --- a/distances.cpp +++ /dev/null @@ -1,336 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "distances.h" - -#include -#include - - -#include "utils.h" -#include "FaissAssert.h" -#include "AuxIndexStructures.h" - -namespace faiss { - -/*************************************************************************** - * Distance functions (other than L2 and IP) - ***************************************************************************/ - -struct VectorDistanceL2 { - size_t d; - - float operator () (const float *x, const float *y) const { - return fvec_L2sqr (x, y, d); - } -}; - -struct VectorDistanceL1 { - size_t d; - - float operator () (const float *x, const float *y) const { - return fvec_L1 (x, y, d); - } -}; - -struct VectorDistanceLinf { - size_t d; - - float operator () (const float *x, const float *y) const { - return fvec_Linf (x, y, d); - /* - float vmax = 0; - for (size_t i = 0; i < d; i++) { - float diff = fabs (x[i] - y[i]); - if (diff > vmax) vmax = diff; - } - return vmax;*/ - } -}; - -struct VectorDistanceLp { - size_t d; - const float p; - - float operator () (const float *x, const float *y) const { - float accu = 0; - for (size_t i = 0; i < d; i++) { - float diff = fabs (x[i] - y[i]); - accu += powf (diff, p); - } - return accu; - } -}; - -struct VectorDistanceCanberra { - size_t d; - - float operator () (const float *x, const float *y) const { - float accu = 0; - for (size_t i = 0; i < d; i++) { - float xi = x[i], yi = y[i]; - accu += fabs (xi - yi) / (fabs(xi) + fabs(yi)); - } - return accu; - } -}; - -struct VectorDistanceBrayCurtis { - size_t d; - - float operator () (const float *x, const float *y) const { - float accu_num = 0, accu_den = 0; - for (size_t i = 0; i < d; i++) { - float xi = x[i], yi = y[i]; - accu_num += fabs (xi - yi); - accu_den += fabs (xi + yi); - } - return accu_num / accu_den; - } -}; - -struct VectorDistanceJensenShannon { - size_t d; - - float operator () (const float *x, const float *y) const { - float accu = 0; - - for (size_t i = 0; i < d; i++) { - float xi = x[i], yi = y[i]; - float mi = 0.5 * (xi + yi); - float kl1 = - xi * log(mi / xi); - float kl2 = - yi * log(mi / yi); - accu += kl1 + kl2; - } - return 0.5 * accu; - } -}; - - - - - - - - - - -namespace { - -template -void pairwise_extra_distances_template ( - VD vd, - int64_t nq, const float *xq, - int64_t nb, const float *xb, - float *dis, - int64_t ldq, int64_t ldb, int64_t ldd) -{ - -#pragma omp parallel for if(nq > 10) - for (int64_t i = 0; i < nq; i++) { - const float *xqi = xq + i * ldq; - const float *xbj = xb; - float *disi = dis + ldd * i; - - for (int64_t j = 0; j < nb; j++) { - disi[j] = vd (xqi, xbj); - xbj += ldb; - } - } -} - - -template -void knn_extra_metrics_template ( - VD vd, - const float * x, - const float * y, - size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - size_t k = res->k; - size_t d = vd.d; - size_t check_period = InterruptCallback::get_period_hint (ny * d); - check_period *= omp_get_max_threads(); - - for (size_t i0 = 0; i0 < nx; i0 += check_period) { - size_t i1 = std::min(i0 + check_period, nx); - -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - const float * x_i = x + i * d; - const float * y_j = y; - size_t j; - float * simi = res->get_val(i); - int64_t * idxi = res->get_ids (i); - - maxheap_heapify (k, simi, idxi); - for (j = 0; j < ny; j++) { - float disij = vd (x_i, y_j); - - if (disij < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, disij, j); - } - y_j += d; - } - maxheap_reorder (k, simi, idxi); - } - InterruptCallback::check (); - } - -} - - -template -struct ExtraDistanceComputer : DistanceComputer { - VD vd; - Index::idx_t nb; - const float *q; - const float *b; - - float operator () (idx_t i) override { - return vd (q, b + i * vd.d); - } - - float symmetric_dis(idx_t i, idx_t j) override { - return vd (b + j * vd.d, b + i * vd.d); - } - - ExtraDistanceComputer(const VD & vd, const float *xb, - size_t nb, const float *q = nullptr) - : vd(vd), nb(nb), q(q), b(xb) {} - - void set_query(const float *x) override { - q = x; - } -}; - - - - - - - - - - - - - - - - -} // anonymous namespace - -void pairwise_extra_distances ( - int64_t d, - int64_t nq, const float *xq, - int64_t nb, const float *xb, - MetricType mt, float metric_arg, - float *dis, - int64_t ldq, int64_t ldb, int64_t ldd) -{ - if (nq == 0 || nb == 0) return; - if (ldq == -1) ldq = d; - if (ldb == -1) ldb = d; - if (ldd == -1) ldd = nb; - - switch(mt) { -#define HANDLE_VAR(kw) \ - case METRIC_ ## kw: { \ - VectorDistance ## kw vd({(size_t)d}); \ - pairwise_extra_distances_template (vd, nq, xq, nb, xb, \ - dis, ldq, ldb, ldd); \ - break; \ - } - HANDLE_VAR(L2); - HANDLE_VAR(L1); - HANDLE_VAR(Linf); - HANDLE_VAR(Canberra); - HANDLE_VAR(BrayCurtis); - HANDLE_VAR(JensenShannon); -#undef HANDLE_VAR - case METRIC_Lp: { - VectorDistanceLp vd({(size_t)d, metric_arg}); - pairwise_extra_distances_template (vd, nq, xq, nb, xb, - dis, ldq, ldb, ldd); - break; - } - default: - FAISS_THROW_MSG ("metric type not implemented"); - } - -} - -void knn_extra_metrics ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - MetricType mt, float metric_arg, - float_maxheap_array_t * res) -{ - - switch(mt) { -#define HANDLE_VAR(kw) \ - case METRIC_ ## kw: { \ - VectorDistance ## kw vd({(size_t)d}); \ - knn_extra_metrics_template (vd, x, y, nx, ny, res); \ - break; \ - } - HANDLE_VAR(L2); - HANDLE_VAR(L1); - HANDLE_VAR(Linf); - HANDLE_VAR(Canberra); - HANDLE_VAR(BrayCurtis); - HANDLE_VAR(JensenShannon); -#undef HANDLE_VAR - case METRIC_Lp: { - VectorDistanceLp vd({(size_t)d, metric_arg}); - knn_extra_metrics_template (vd, x, y, nx, ny, res); - break; - } - default: - FAISS_THROW_MSG ("metric type not implemented"); - } - -} - -DistanceComputer *get_extra_distance_computer ( - size_t d, - MetricType mt, float metric_arg, - size_t nb, const float *xb) -{ - - switch(mt) { -#define HANDLE_VAR(kw) \ - case METRIC_ ## kw: { \ - VectorDistance ## kw vd({(size_t)d}); \ - return new ExtraDistanceComputer(vd, xb, nb); \ - } - HANDLE_VAR(L2); - HANDLE_VAR(L1); - HANDLE_VAR(Linf); - HANDLE_VAR(Canberra); - HANDLE_VAR(BrayCurtis); - HANDLE_VAR(JensenShannon); -#undef HANDLE_VAR - case METRIC_Lp: { - VectorDistanceLp vd({(size_t)d, metric_arg}); - return new ExtraDistanceComputer (vd, xb, nb); - break; - } - default: - FAISS_THROW_MSG ("metric type not implemented"); - } - -} - - -} // namespace faiss diff --git a/distances.h b/distances.h deleted file mode 100644 index 9432b3e78d..0000000000 --- a/distances.h +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#ifndef FAISS_distances_h -#define FAISS_distances_h - -/** In this file are the implementations of extra metrics beyond L2 - * and inner product */ - -#include - -#include "Index.h" - -#include "Heap.h" - - - -namespace faiss { - - -void pairwise_extra_distances ( - int64_t d, - int64_t nq, const float *xq, - int64_t nb, const float *xb, - MetricType mt, float metric_arg, - float *dis, - int64_t ldq = -1, int64_t ldb = -1, int64_t ldd = -1); - - -void knn_extra_metrics ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - MetricType mt, float metric_arg, - float_maxheap_array_t * res); - - -/** get a DistanceComputer that refers to this type of distance and - * indexes a flat array of size nb */ -DistanceComputer *get_extra_distance_computer ( - size_t d, - MetricType mt, float metric_arg, - size_t nb, const float *xb); - -} - - -#endif diff --git a/gpu/impl/InvertedListAppend.cu b/gpu/impl/InvertedListAppend.cu deleted file mode 100644 index 36d6ecb137..0000000000 --- a/gpu/impl/InvertedListAppend.cu +++ /dev/null @@ -1,271 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - - -#include "InvertedListAppend.cuh" -#include "../../FaissAssert.h" -#include "../utils/Float16.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Tensor.cuh" -#include "../utils/StaticUtils.h" - -namespace faiss { namespace gpu { - -__global__ void -runUpdateListPointers(Tensor listIds, - Tensor newListLength, - Tensor newCodePointers, - Tensor newIndexPointers, - int* listLengths, - void** listCodes, - void** listIndices) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - - if (index >= listIds.getSize(0)) { - return; - } - - int listId = listIds[index]; - listLengths[listId] = newListLength[index]; - listCodes[listId] = newCodePointers[index]; - listIndices[listId] = newIndexPointers[index]; -} - -void -runUpdateListPointers(Tensor& listIds, - Tensor& newListLength, - Tensor& newCodePointers, - Tensor& newIndexPointers, - thrust::device_vector& listLengths, - thrust::device_vector& listCodes, - thrust::device_vector& listIndices, - cudaStream_t stream) { - int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice()); - int numBlocks = utils::divUp(listIds.getSize(0), numThreads); - - dim3 grid(numBlocks); - dim3 block(numThreads); - - runUpdateListPointers<<>>( - listIds, newListLength, newCodePointers, newIndexPointers, - listLengths.data().get(), - listCodes.data().get(), - listIndices.data().get()); - - CUDA_TEST_ERROR(); -} - -template -__global__ void -ivfpqInvertedListAppend(Tensor listIds, - Tensor listOffset, - Tensor encodings, - Tensor indices, - void** listCodes, - void** listIndices) { - int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x; - - if (encodingToAdd >= listIds.getSize(0)) { - return; - } - - int listId = listIds[encodingToAdd]; - int offset = listOffset[encodingToAdd]; - - // Add vector could be invalid (contains NaNs etc) - if (listId == -1 || offset == -1) { - return; - } - - auto encoding = encodings[encodingToAdd]; - long index = indices[encodingToAdd]; - - if (Opt == INDICES_32_BIT) { - // FIXME: there could be overflow here, but where should we check this? - ((int*) listIndices[listId])[offset] = (int) index; - } else if (Opt == INDICES_64_BIT) { - ((long*) listIndices[listId])[offset] = (long) index; - } else { - // INDICES_CPU or INDICES_IVF; no indices are being stored - } - - unsigned char* codeStart = - ((unsigned char*) listCodes[listId]) + offset * encodings.getSize(1); - - // FIXME: slow - for (int i = 0; i < encodings.getSize(1); ++i) { - codeStart[i] = (unsigned char) encoding[i]; - } -} - -void -runIVFPQInvertedListAppend(Tensor& listIds, - Tensor& listOffset, - Tensor& encodings, - Tensor& indices, - thrust::device_vector& listCodes, - thrust::device_vector& listIndices, - IndicesOptions indicesOptions, - cudaStream_t stream) { - int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice()); - int numBlocks = utils::divUp(listIds.getSize(0), numThreads); - - dim3 grid(numBlocks); - dim3 block(numThreads); - -#define RUN_APPEND(IND) \ - do { \ - ivfpqInvertedListAppend<<>>( \ - listIds, listOffset, encodings, indices, \ - listCodes.data().get(), \ - listIndices.data().get()); \ - } while (0) - - if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { - // no need to maintain indices on the GPU - RUN_APPEND(INDICES_IVF); - } else if (indicesOptions == INDICES_32_BIT) { - RUN_APPEND(INDICES_32_BIT); - } else if (indicesOptions == INDICES_64_BIT) { - RUN_APPEND(INDICES_64_BIT); - } else { - // unknown index storage type - FAISS_ASSERT(false); - } - - CUDA_TEST_ERROR(); - -#undef RUN_APPEND -} - -template -__global__ void -ivfFlatInvertedListAppend(Tensor listIds, - Tensor listOffset, - Tensor vecs, - Tensor indices, - void** listData, - void** listIndices) { - int vec = blockIdx.x; - - int listId = listIds[vec]; - int offset = listOffset[vec]; - - // Add vector could be invalid (contains NaNs etc) - if (listId == -1 || offset == -1) { - return; - } - - if (threadIdx.x == 0) { - long index = indices[vec]; - - if (Opt == INDICES_32_BIT) { - // FIXME: there could be overflow here, but where should we check this? - ((int*) listIndices[listId])[offset] = (int) index; - } else if (Opt == INDICES_64_BIT) { - ((long*) listIndices[listId])[offset] = (long) index; - } else { - // INDICES_CPU or INDICES_IVF; no indices are being stored - } - } - -#ifdef FAISS_USE_FLOAT16 - // FIXME: should use half2 for better memory b/w - if (Float16) { - half* vecStart = ((half*) listData[listId]) + offset * vecs.getSize(1); - - if (Exact) { - vecStart[threadIdx.x] = __float2half(vecs[vec][threadIdx.x]); - } else { - for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) { - vecStart[i] = __float2half(vecs[vec][i]); - } - } - } -#else - static_assert(!Float16, "float16 unsupported"); -#endif - - if (!Float16) { - float* vecStart = ((float*) listData[listId]) + offset * vecs.getSize(1); - - if (Exact) { - vecStart[threadIdx.x] = vecs[vec][threadIdx.x]; - } else { - for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) { - vecStart[i] = vecs[vec][i]; - } - } - } -} - -void -runIVFFlatInvertedListAppend(Tensor& listIds, - Tensor& listOffset, - Tensor& vecs, - Tensor& indices, - bool useFloat16, - thrust::device_vector& listData, - thrust::device_vector& listIndices, - IndicesOptions indicesOptions, - cudaStream_t stream) { - int maxThreads = getMaxThreadsCurrentDevice(); - bool exact = vecs.getSize(1) <= maxThreads; - - // Each block will handle appending a single vector - dim3 grid(vecs.getSize(0)); - dim3 block(std::min(vecs.getSize(1), maxThreads)); - -#define RUN_APPEND_OPT(OPT, EXACT, FLOAT16) \ - do { \ - ivfFlatInvertedListAppend \ - <<>>( \ - listIds, listOffset, vecs, indices, \ - listData.data().get(), \ - listIndices.data().get()); \ - } while (0) \ - -#define RUN_APPEND(EXACT, FLOAT16) \ - do { \ - if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { \ - /* no indices are maintained on the GPU */ \ - RUN_APPEND_OPT(INDICES_IVF, EXACT, FLOAT16); \ - } else if (indicesOptions == INDICES_32_BIT) { \ - RUN_APPEND_OPT(INDICES_32_BIT, EXACT, FLOAT16); \ - } else if (indicesOptions == INDICES_64_BIT) { \ - RUN_APPEND_OPT(INDICES_64_BIT, EXACT, FLOAT16); \ - } else { \ - FAISS_ASSERT(false); \ - } \ - } while (0); - - if (useFloat16) { -#ifdef FAISS_USE_FLOAT16 - if (exact) { - RUN_APPEND(true, true); - } else { - RUN_APPEND(false, true); - } -#else - // no float16 support - FAISS_ASSERT(false); -#endif - } else { - if (exact) { - RUN_APPEND(true, false); - } else { - RUN_APPEND(false, false); - } - } - - CUDA_TEST_ERROR(); - -#undef RUN_APPEND -#undef RUN_APPEND_OPT -} - -} } // namespace diff --git a/gpu/impl/InvertedListAppend.cuh b/gpu/impl/InvertedListAppend.cuh deleted file mode 100644 index e26ed70ef8..0000000000 --- a/gpu/impl/InvertedListAppend.cuh +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - - -#pragma once - -#include "../GpuIndicesOptions.h" -#include "../utils/Tensor.cuh" -#include - -namespace faiss { namespace gpu { - -/// Update device-side list pointers in a batch -void runUpdateListPointers(Tensor& listIds, - Tensor& newListLength, - Tensor& newCodePointers, - Tensor& newIndexPointers, - thrust::device_vector& listLengths, - thrust::device_vector& listCodes, - thrust::device_vector& listIndices, - cudaStream_t stream); - -/// Actually append the new codes / vector indices to the individual lists - -/// IVFPQ -void runIVFPQInvertedListAppend(Tensor& listIds, - Tensor& listOffset, - Tensor& encodings, - Tensor& indices, - thrust::device_vector& listCodes, - thrust::device_vector& listIndices, - IndicesOptions indicesOptions, - cudaStream_t stream); - -/// IVF flat storage -void runIVFFlatInvertedListAppend(Tensor& listIds, - Tensor& listOffset, - Tensor& vecs, - Tensor& indices, - bool useFloat16, - thrust::device_vector& listData, - thrust::device_vector& listIndices, - IndicesOptions indicesOptions, - cudaStream_t stream); - -} } // namespace diff --git a/hamming.cpp b/hamming.cpp deleted file mode 100644 index fca9ef5cc7..0000000000 --- a/hamming.cpp +++ /dev/null @@ -1,776 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -/* - * Implementation of Hamming related functions (distances, smallest distance - * selection with regular heap|radix and probabilistic heap|radix. - * - * IMPLEMENTATION NOTES - * Bitvectors are generally assumed to be multiples of 64 bits. - * - * hamdis_t is used for distances because at this time - * it is not clear how we will need to balance - * - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors) - * - memory usage - * - cache-misses when dealing with large volumes of data (lower bits is better) - * - * The hamdis_t should optimally be compatibe with one of the Torch Storage - * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes -*/ - -#include "hamming.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "Heap.h" -#include "FaissAssert.h" - -static const size_t BLOCKSIZE_QUERY = 8192; - - -namespace faiss { - -size_t hamming_batch_size = 65536; - -static const uint8_t hamdis_tab_ham_bytes[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - - -/* Elementary Hamming distance computation: unoptimized */ -template -T hamming (const uint8_t *bs1, - const uint8_t *bs2) -{ - const size_t nbytes = nbits / 8; - size_t i; - T h = 0; - for (i = 0; i < nbytes; i++) - h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]]; - return h; -} - - -/* Hamming distances for multiples of 64 bits */ -template -hamdis_t hamming (const uint64_t * bs1, const uint64_t * bs2) -{ - const size_t nwords = nbits / 64; - size_t i; - hamdis_t h = 0; - for (i = 0; i < nwords; i++) - h += popcount64 (bs1[i] ^ bs2[i]); - return h; -} - - - -/* specialized (optimized) functions */ -template <> -hamdis_t hamming<64> (const uint64_t * pa, const uint64_t * pb) -{ - return popcount64 (pa[0] ^ pb[0]); -} - - -template <> -hamdis_t hamming<128> (const uint64_t *pa, const uint64_t *pb) -{ - return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]); -} - - -template <> -hamdis_t hamming<256> (const uint64_t * pa, const uint64_t * pb) -{ - return popcount64 (pa[0] ^ pb[0]) - + popcount64 (pa[1] ^ pb[1]) - + popcount64 (pa[2] ^ pb[2]) - + popcount64 (pa[3] ^ pb[3]); -} - - -/* Hamming distances for multiple of 64 bits */ -hamdis_t hamming ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t nwords) -{ - size_t i; - hamdis_t h = 0; - for (i = 0; i < nwords; i++) - h += popcount64 (bs1[i] ^ bs2[i]); - return h; -} - - - -template -void hammings ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t n1, size_t n2, - hamdis_t * dis) - -{ - size_t i, j; - const size_t nwords = nbits / 64; - for (i = 0; i < n1; i++) { - const uint64_t * __restrict bs1_ = bs1 + i * nwords; - hamdis_t * __restrict dis_ = dis + i * n2; - for (j = 0; j < n2; j++) - dis_[j] = hamming(bs1_, bs2 + j * nwords); - } -} - - - -void hammings ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t n1, - size_t n2, - size_t nwords, - hamdis_t * __restrict dis) -{ - size_t i, j; - n1 *= nwords; - n2 *= nwords; - for (i = 0; i < n1; i+=nwords) { - const uint64_t * bs1_ = bs1+i; - for (j = 0; j < n2; j+=nwords) - dis[j] = hamming (bs1_, bs2+j, nwords); - } -} - - - - -/* Count number of matches given a max threshold */ -template -void hamming_count_thres ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t * nptr) -{ - const size_t nwords = nbits / 64; - size_t i, j, posm = 0; - const uint64_t * bs2_ = bs2; - - for (i = 0; i < n1; i++) { - bs2 = bs2_; - for (j = 0; j < n2; j++) { - /* collect the match only if this satisfies the threshold */ - if (hamming (bs1, bs2) <= ht) - posm++; - bs2 += nwords; - } - bs1 += nwords; /* next signature */ - } - *nptr = posm; -} - - -template -void crosshamming_count_thres ( - const uint64_t * dbs, - size_t n, - int ht, - size_t * nptr) -{ - const size_t nwords = nbits / 64; - size_t i, j, posm = 0; - const uint64_t * bs1 = dbs; - for (i = 0; i < n; i++) { - const uint64_t * bs2 = bs1 + 2; - for (j = i + 1; j < n; j++) { - /* collect the match only if this satisfies the threshold */ - if (hamming (bs1, bs2) <= ht) - posm++; - bs2 += nwords; - } - bs1 += nwords; - } - *nptr = posm; -} - - -template -size_t match_hamming_thres ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t n1, - size_t n2, - int ht, - int64_t * idx, - hamdis_t * hams) -{ - const size_t nwords = nbits / 64; - size_t i, j, posm = 0; - hamdis_t h; - const uint64_t * bs2_ = bs2; - for (i = 0; i < n1; i++) { - bs2 = bs2_; - for (j = 0; j < n2; j++) { - /* Here perform the real work of computing the distance */ - h = hamming (bs1, bs2); - - /* collect the match only if this satisfies the threshold */ - if (h <= ht) { - /* Enough space to store another match ? */ - *idx = i; idx++; - *idx = j; idx++; - *hams = h; - hams++; - posm++; - } - bs2+=nwords; /* next signature */ - } - bs1+=nwords; - } - return posm; -} - - -/* Return closest neighbors w.r.t Hamming distance, using a heap. */ -template -static -void hammings_knn_hc ( - int bytes_per_code, - int_maxheap_array_t * ha, - const uint8_t * bs1, - const uint8_t * bs2, - size_t n2, - bool order = true, - bool init_heap = true) -{ - size_t k = ha->k; - if (init_heap) ha->heapify (); - - const size_t block_size = hamming_batch_size; - for (size_t j0 = 0; j0 < n2; j0 += block_size) { - const size_t j1 = std::min(j0 + block_size, n2); -#pragma omp parallel for - for (size_t i = 0; i < ha->nh; i++) { - HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code); - - const uint8_t * bs2_ = bs2 + j0 * bytes_per_code; - hamdis_t dis; - hamdis_t * __restrict bh_val_ = ha->val + i * k; - int64_t * __restrict bh_ids_ = ha->ids + i * k; - size_t j; - for (j = j0; j < j1; j++, bs2_+= bytes_per_code) { - dis = hc.hamming (bs2_); - if (dis < bh_val_[0]) { - faiss::maxheap_pop (k, bh_val_, bh_ids_); - faiss::maxheap_push (k, bh_val_, bh_ids_, dis, j); - } - } - } - } - if (order) ha->reorder (); - } - -/* Return closest neighbors w.r.t Hamming distance, using max count. */ -template -static -void hammings_knn_mc ( - int bytes_per_code, - const uint8_t *a, - const uint8_t *b, - size_t na, - size_t nb, - size_t k, - int32_t *distances, - int64_t *labels) -{ - const int nBuckets = bytes_per_code * 8 + 1; - std::vector all_counters(na * nBuckets, 0); - std::unique_ptr all_ids_per_dis(new int64_t[na * nBuckets * k]); - - std::vector> cs; - for (size_t i = 0; i < na; ++i) { - cs.push_back(HCounterState( - all_counters.data() + i * nBuckets, - all_ids_per_dis.get() + i * nBuckets * k, - a + i * bytes_per_code, - 8 * bytes_per_code, - k - )); - } - - const size_t block_size = hamming_batch_size; - for (size_t j0 = 0; j0 < nb; j0 += block_size) { - const size_t j1 = std::min(j0 + block_size, nb); -#pragma omp parallel for - for (size_t i = 0; i < na; ++i) { - for (size_t j = j0; j < j1; ++j) { - cs[i].update_counter(b + j * bytes_per_code, j); - } - } - } - - for (size_t i = 0; i < na; ++i) { - HCounterState& csi = cs[i]; - - int nres = 0; - for (int b = 0; b < nBuckets && nres < k; b++) { - for (int l = 0; l < csi.counters[b] && nres < k; l++) { - labels[i * k + nres] = csi.ids_per_dis[b * k + l]; - distances[i * k + nres] = b; - nres++; - } - } - while (nres < k) { - labels[i * k + nres] = -1; - distances[i * k + nres] = std::numeric_limits::max(); - ++nres; - } - } -} - - - -// works faster than the template version -static -void hammings_knn_hc_1 ( - int_maxheap_array_t * ha, - const uint64_t * bs1, - const uint64_t * bs2, - size_t n2, - bool order = true, - bool init_heap = true) -{ - const size_t nwords = 1; - size_t k = ha->k; - - - if (init_heap) { - ha->heapify (); - } - -#pragma omp parallel for - for (size_t i = 0; i < ha->nh; i++) { - const uint64_t bs1_ = bs1 [i]; - const uint64_t * bs2_ = bs2; - hamdis_t dis; - hamdis_t * bh_val_ = ha->val + i * k; - hamdis_t bh_val_0 = bh_val_[0]; - int64_t * bh_ids_ = ha->ids + i * k; - size_t j; - for (j = 0; j < n2; j++, bs2_+= nwords) { - dis = popcount64 (bs1_ ^ *bs2_); - if (dis < bh_val_0) { - faiss::maxheap_pop (k, bh_val_, bh_ids_); - faiss::maxheap_push (k, bh_val_, bh_ids_, dis, j); - bh_val_0 = bh_val_[0]; - } - } - } - if (order) { - ha->reorder (); - } -} - - - - -/* Functions to maps vectors to bits. Assume proper allocation done beforehand, - meaning that b should be be able to receive as many bits as x may produce. */ - -/* - * dimension 0 corresponds to the least significant bit of b[0], or - * equivalently to the lsb of the first byte that is stored. - */ -void fvec2bitvec (const float * x, uint8_t * b, size_t d) -{ - for (int i = 0; i < d; i += 8) { - uint8_t w = 0; - uint8_t mask = 1; - int nj = i + 8 <= d ? 8 : d - i; - for (int j = 0; j < nj; j++) { - if (x[i + j] >= 0) - w |= mask; - mask <<= 1; - } - *b = w; - b++; - } -} - - - -/* Same but for n vectors. - Ensure that the ouptut b is byte-aligned (pad with 0s). */ -void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n) -{ - const int64_t ncodes = ((d + 7) / 8); -#pragma omp parallel for - for (size_t i = 0; i < n; i++) - fvec2bitvec (x + i * d, b + i * ncodes, d); -} - - -/* Reverse bit (NOT a optimized function, only used for print purpose) */ -static uint64_t uint64_reverse_bits (uint64_t b) -{ - int i; - uint64_t revb = 0; - for (i = 0; i < 64; i++) { - revb <<= 1; - revb |= b & 1; - b >>= 1; - } - return revb; -} - - -/* print the bit vector */ -void bitvec_print (const uint8_t * b, size_t d) -{ - size_t i, j; - for (i = 0; i < d; ) { - uint64_t brev = uint64_reverse_bits (* (uint64_t *) b); - for (j = 0; j < 64 && i < d; j++, i++) { - printf ("%d", (int) (brev & 1)); - brev >>= 1; - } - b += 8; - printf (" "); - } -} - - - - - -/*----------------------------------------*/ -/* Hamming distance computation and k-nn */ - - -#define C64(x) ((uint64_t *)x) - - -/* Compute a set of Hamming distances */ -void hammings ( - const uint8_t * a, - const uint8_t * b, - size_t na, size_t nb, - size_t ncodes, - hamdis_t * __restrict dis) -{ - FAISS_THROW_IF_NOT (ncodes % 8 == 0); - switch (ncodes) { - case 8: - faiss::hammings <64> (C64(a), C64(b), na, nb, dis); return; - case 16: - faiss::hammings <128> (C64(a), C64(b), na, nb, dis); return; - case 32: - faiss::hammings <256> (C64(a), C64(b), na, nb, dis); return; - case 64: - faiss::hammings <512> (C64(a), C64(b), na, nb, dis); return; - default: - faiss::hammings (C64(a), C64(b), na, nb, ncodes * 8, dis); return; - } -} - -void hammings_knn( - int_maxheap_array_t *ha, - const uint8_t *a, - const uint8_t *b, - size_t nb, - size_t ncodes, - int order) -{ - hammings_knn_hc(ha, a, b, nb, ncodes, order); -} -void hammings_knn_hc ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t ncodes, - int order) -{ - switch (ncodes) { - case 4: - hammings_knn_hc - (4, ha, a, b, nb, order, true); - break; - case 8: - hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order, true); - // hammings_knn_hc - // (8, ha, a, b, nb, order, true); - break; - case 16: - hammings_knn_hc - (16, ha, a, b, nb, order, true); - break; - case 32: - hammings_knn_hc - (32, ha, a, b, nb, order, true); - break; - default: - if(ncodes % 8 == 0) { - hammings_knn_hc - (ncodes, ha, a, b, nb, order, true); - } else { - hammings_knn_hc - (ncodes, ha, a, b, nb, order, true); - - } - } -} - -void hammings_knn_mc( - const uint8_t * a, - const uint8_t * b, - size_t na, - size_t nb, - size_t k, - size_t ncodes, - int32_t *distances, - int64_t *labels) -{ - switch (ncodes) { - case 4: - hammings_knn_mc( - 4, a, b, na, nb, k, distances, labels - ); - break; - case 8: - // TODO(hoss): Write analog to hammings_knn_hc_1 - // hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order, true); - hammings_knn_mc( - 8, a, b, na, nb, k, distances, labels - ); - break; - case 16: - hammings_knn_mc( - 16, a, b, na, nb, k, distances, labels - ); - break; - case 32: - hammings_knn_mc( - 32, a, b, na, nb, k, distances, labels - ); - break; - default: - if(ncodes % 8 == 0) { - hammings_knn_mc( - ncodes, a, b, na, nb, k, distances, labels - ); - } else { - hammings_knn_mc( - ncodes, a, b, na, nb, k, distances, labels - ); - } - } -} - - - - -/* Count number of matches given a max threshold */ -void hamming_count_thres ( - const uint8_t * bs1, - const uint8_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t ncodes, - size_t * nptr) -{ - switch (ncodes) { - case 8: - faiss::hamming_count_thres <64> (C64(bs1), C64(bs2), - n1, n2, ht, nptr); - return; - case 16: - faiss::hamming_count_thres <128> (C64(bs1), C64(bs2), - n1, n2, ht, nptr); - return; - case 32: - faiss::hamming_count_thres <256> (C64(bs1), C64(bs2), - n1, n2, ht, nptr); - return; - case 64: - faiss::hamming_count_thres <512> (C64(bs1), C64(bs2), - n1, n2, ht, nptr); - return; - default: - FAISS_THROW_FMT ("not implemented for %zu bits", ncodes); - } -} - - -/* Count number of cross-matches given a threshold */ -void crosshamming_count_thres ( - const uint8_t * dbs, - size_t n, - hamdis_t ht, - size_t ncodes, - size_t * nptr) -{ - switch (ncodes) { - case 8: - faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr); - return; - case 16: - faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr); - return; - case 32: - faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr); - return; - case 64: - faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr); - return; - default: - FAISS_THROW_FMT ("not implemented for %zu bits", ncodes); - } -} - - -/* Returns all matches given a threshold */ -size_t match_hamming_thres ( - const uint8_t * bs1, - const uint8_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t ncodes, - int64_t * idx, - hamdis_t * dis) -{ - switch (ncodes) { - case 8: - return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2), - n1, n2, ht, idx, dis); - case 16: - return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2), - n1, n2, ht, idx, dis); - case 32: - return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2), - n1, n2, ht, idx, dis); - case 64: - return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2), - n1, n2, ht, idx, dis); - default: - FAISS_THROW_FMT ("not implemented for %zu bits", ncodes); - return 0; - } -} - - -#undef C64 - - - -/************************************* - * generalized Hamming distances - ************************************/ - - - -template -static void hamming_dis_inner_loop ( - const uint8_t *ca, - const uint8_t *cb, - size_t nb, - size_t code_size, - int k, - hamdis_t * bh_val_, - int64_t * bh_ids_) -{ - - HammingComputer hc (ca, code_size); - - for (size_t j = 0; j < nb; j++) { - int ndiff = hc.hamming (cb); - cb += code_size; - if (ndiff < bh_val_[0]) { - maxheap_pop (k, bh_val_, bh_ids_); - maxheap_push (k, bh_val_, bh_ids_, ndiff, j); - } - } -} - -void generalized_hammings_knn_hc ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t code_size, - int ordered) -{ - int na = ha->nh; - int k = ha->k; - - if (ordered) - ha->heapify (); - -#pragma omp parallel for - for (int i = 0; i < na; i++) { - const uint8_t *ca = a + i * code_size; - const uint8_t *cb = b; - - hamdis_t * bh_val_ = ha->val + i * k; - int64_t * bh_ids_ = ha->ids + i * k; - - switch (code_size) { - case 8: - hamming_dis_inner_loop - (ca, cb, nb, 8, k, bh_val_, bh_ids_); - break; - case 16: - hamming_dis_inner_loop - (ca, cb, nb, 16, k, bh_val_, bh_ids_); - break; - case 32: - hamming_dis_inner_loop - (ca, cb, nb, 32, k, bh_val_, bh_ids_); - break; - default: - hamming_dis_inner_loop - (ca, cb, nb, code_size, k, bh_val_, bh_ids_); - break; - } - } - - if (ordered) - ha->reorder (); - -} - - -} // namespace faiss diff --git a/hamming.h b/hamming.h deleted file mode 100644 index e5ef13c9b5..0000000000 --- a/hamming.h +++ /dev/null @@ -1,572 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -/* - * Hamming distances. The binary vector dimensionality should be a - * multiple of 8, as the elementary operations operate on bytes. If - * you need other sizes, just pad with 0s (this is done by function - * fvecs2bitvecs). - * - * User-defined type hamdis_t is used for distances because at this time - * it is still uncler clear how we will need to balance - * - flexibility in vector size (may need 16- or even 8-bit vectors) - * - memory usage - * - cache-misses when dealing with large volumes of data (fewer bits is better) - * - */ - -#ifndef FAISS_hamming_h -#define FAISS_hamming_h - - -#include - -#include "Heap.h" - - -/* The Hamming distance type */ -typedef int32_t hamdis_t; - -namespace faiss { - - -extern size_t hamming_batch_size; - -inline int popcount64(uint64_t x) { - return __builtin_popcountl(x); -} - - -/** Compute a set of Hamming distances between na and nb binary vectors - * - * @param a size na * nbytespercode - * @param b size nb * nbytespercode - * @param nbytespercode should be multiple of 8 - * @param dis output distances, size na * nb - */ -void hammings ( - const uint8_t * a, - const uint8_t * b, - size_t na, size_t nb, - size_t nbytespercode, - hamdis_t * dis); - -void bitvec_print (const uint8_t * b, size_t d); - - -/* Functions for casting vectors of regular types to compact bits. - They assume proper allocation done beforehand, meaning that b - should be be able to receive as many bits as x may produce. */ - -/* Makes an array of bits from the signs of a float array. The length - of the output array b is rounded up to byte size (allocate - accordingly) */ -void fvecs2bitvecs ( - const float * x, - uint8_t * b, - size_t d, - size_t n); - - -void fvec2bitvec (const float * x, uint8_t * b, size_t d); - - - -/** Return the k smallest Hamming distances for a set of binary query vectors, - * using a max heap. - * @param a queries, size ha->nh * ncodes - * @param b database, size nb * ncodes - * @param nb number of database vectors - * @param ncodes size of the binary codes (bytes) - * @param ordered if != 0: order the results by decreasing distance - * (may be bottleneck for k/n > 0.01) */ -void hammings_knn_hc ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t ncodes, - int ordered); - -/* Legacy alias to hammings_knn_hc. */ -void hammings_knn ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t ncodes, - int ordered); - -/** Return the k smallest Hamming distances for a set of binary query vectors, - * using counting max. - * @param a queries, size na * ncodes - * @param b database, size nb * ncodes - * @param na number of query vectors - * @param nb number of database vectors - * @param k number of vectors/distances to return - * @param ncodes size of the binary codes (bytes) - * @param distances output distances from each query vector to its k nearest - * neighbors - * @param labels output ids of the k nearest neighbors to each query vector - */ -void hammings_knn_mc ( - const uint8_t * a, - const uint8_t * b, - size_t na, - size_t nb, - size_t k, - size_t ncodes, - int32_t *distances, - int64_t *labels); - -/* Counting the number of matches or of cross-matches (without returning them) - For use with function that assume pre-allocated memory */ -void hamming_count_thres ( - const uint8_t * bs1, - const uint8_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t ncodes, - size_t * nptr); - -/* Return all Hamming distances/index passing a thres. Pre-allocation of output - is required. Use hamming_count_thres to determine the proper size. */ -size_t match_hamming_thres ( - const uint8_t * bs1, - const uint8_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t ncodes, - int64_t * idx, - hamdis_t * dis); - -/* Cross-matching in a set of vectors */ -void crosshamming_count_thres ( - const uint8_t * dbs, - size_t n, - hamdis_t ht, - size_t ncodes, - size_t * nptr); - - -/* compute the Hamming distances between two codewords of nwords*64 bits */ -hamdis_t hamming ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t nwords); - - - - -/****************************************************************** - * The HammingComputer series of classes compares a single code of - * size 4 to 32 to incoming codes. They are intended for use as a - * template class where it would be inefficient to switch on the code - * size in the inner loop. Hopefully the compiler will inline the - * hamming() functions and put the a0, a1, ... in registers. - ******************************************************************/ - - -struct HammingComputer4 { - uint32_t a0; - - HammingComputer4 () {} - - HammingComputer4 (const uint8_t *a, int code_size) { - set (a, code_size); - } - - void set (const uint8_t *a, int code_size) { - assert (code_size == 4); - a0 = *(uint32_t *)a; - } - - inline int hamming (const uint8_t *b) const { - return popcount64 (*(uint32_t *)b ^ a0); - } - -}; - -struct HammingComputer8 { - uint64_t a0; - - HammingComputer8 () {} - - HammingComputer8 (const uint8_t *a, int code_size) { - set (a, code_size); - } - - void set (const uint8_t *a, int code_size) { - assert (code_size == 8); - a0 = *(uint64_t *)a; - } - - inline int hamming (const uint8_t *b) const { - return popcount64 (*(uint64_t *)b ^ a0); - } - -}; - - -struct HammingComputer16 { - uint64_t a0, a1; - - HammingComputer16 () {} - - HammingComputer16 (const uint8_t *a8, int code_size) { - set (a8, code_size); - } - - void set (const uint8_t *a8, int code_size) { - assert (code_size == 16); - const uint64_t *a = (uint64_t *)a8; - a0 = a[0]; a1 = a[1]; - } - - inline int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1); - } - -}; - -// when applied to an array, 1/2 of the 64-bit accesses are unaligned. -// This incurs a penalty of ~10% wrt. fully aligned accesses. -struct HammingComputer20 { - uint64_t a0, a1; - uint32_t a2; - - HammingComputer20 () {} - - HammingComputer20 (const uint8_t *a8, int code_size) { - set (a8, code_size); - } - - void set (const uint8_t *a8, int code_size) { - assert (code_size == 20); - const uint64_t *a = (uint64_t *)a8; - a0 = a[0]; a1 = a[1]; a2 = a[2]; - } - - inline int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) + - popcount64 (*(uint32_t*)(b + 2) ^ a2); - } -}; - -struct HammingComputer32 { - uint64_t a0, a1, a2, a3; - - HammingComputer32 () {} - - HammingComputer32 (const uint8_t *a8, int code_size) { - set (a8, code_size); - } - - void set (const uint8_t *a8, int code_size) { - assert (code_size == 32); - const uint64_t *a = (uint64_t *)a8; - a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3]; - } - - inline int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) + - popcount64 (b[2] ^ a2) + popcount64 (b[3] ^ a3); - } - -}; - -struct HammingComputer64 { - uint64_t a0, a1, a2, a3, a4, a5, a6, a7; - - HammingComputer64 () {} - - HammingComputer64 (const uint8_t *a8, int code_size) { - set (a8, code_size); - } - - void set (const uint8_t *a8, int code_size) { - assert (code_size == 64); - const uint64_t *a = (uint64_t *)a8; - a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3]; - a4 = a[4]; a5 = a[5]; a6 = a[6]; a7 = a[7]; - } - - inline int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) + - popcount64 (b[2] ^ a2) + popcount64 (b[3] ^ a3) + - popcount64 (b[4] ^ a4) + popcount64 (b[5] ^ a5) + - popcount64 (b[6] ^ a6) + popcount64 (b[7] ^ a7); - } - -}; - -// very inefficient... -struct HammingComputerDefault { - const uint8_t *a; - int n; - - HammingComputerDefault () {} - - HammingComputerDefault (const uint8_t *a8, int code_size) { - set (a8, code_size); - } - - void set (const uint8_t *a8, int code_size) { - a = a8; - n = code_size; - } - - int hamming (const uint8_t *b8) const { - int accu = 0; - for (int i = 0; i < n; i++) - accu += popcount64 (a[i] ^ b8[i]); - return accu; - } - -}; - - -struct HammingComputerM8 { - const uint64_t *a; - int n; - - HammingComputerM8 () {} - - HammingComputerM8 (const uint8_t *a8, int code_size) { - set (a8, code_size); - } - - void set (const uint8_t *a8, int code_size) { - assert (code_size % 8 == 0); - a = (uint64_t *)a8; - n = code_size / 8; - } - - int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - int accu = 0; - for (int i = 0; i < n; i++) - accu += popcount64 (a[i] ^ b[i]); - return accu; - } - -}; - -// even more inefficient! -struct HammingComputerM4 { - const uint32_t *a; - int n; - - HammingComputerM4 () {} - - HammingComputerM4 (const uint8_t *a4, int code_size) { - set (a4, code_size); - } - - void set (const uint8_t *a4, int code_size) { - assert (code_size % 4 == 0); - a = (uint32_t *)a4; - n = code_size / 4; - } - - int hamming (const uint8_t *b8) const { - const uint32_t *b = (uint32_t *)b8; - int accu = 0; - for (int i = 0; i < n; i++) - accu += popcount64 (a[i] ^ b[i]); - return accu; - } - -}; - -/*************************************************************************** - * Equivalence with a template class when code size is known at compile time - **************************************************************************/ - -// default template -template -struct HammingComputer: HammingComputerM8 { - HammingComputer (const uint8_t *a, int code_size): - HammingComputerM8(a, code_size) {} -}; - -#define SPECIALIZED_HC(CODE_SIZE) \ - template<> struct HammingComputer: \ - HammingComputer ## CODE_SIZE { \ - HammingComputer (const uint8_t *a): \ - HammingComputer ## CODE_SIZE(a, CODE_SIZE) {} \ - } - -SPECIALIZED_HC(4); -SPECIALIZED_HC(8); -SPECIALIZED_HC(16); -SPECIALIZED_HC(20); -SPECIALIZED_HC(32); -SPECIALIZED_HC(64); - -#undef SPECIALIZED_HC - - -/*************************************************************************** - * generalized Hamming = number of bytes that are different between - * two codes. - ***************************************************************************/ - - -inline int generalized_hamming_64 (uint64_t a) { - a |= a >> 1; - a |= a >> 2; - a |= a >> 4; - a &= 0x0101010101010101UL; - return popcount64 (a); -} - - -struct GenHammingComputer8 { - uint64_t a0; - - GenHammingComputer8 (const uint8_t *a, int code_size) { - assert (code_size == 8); - a0 = *(uint64_t *)a; - } - - inline int hamming (const uint8_t *b) const { - return generalized_hamming_64 (*(uint64_t *)b ^ a0); - } - -}; - - -struct GenHammingComputer16 { - uint64_t a0, a1; - GenHammingComputer16 (const uint8_t *a8, int code_size) { - assert (code_size == 16); - const uint64_t *a = (uint64_t *)a8; - a0 = a[0]; a1 = a[1]; - } - - inline int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - return generalized_hamming_64 (b[0] ^ a0) + - generalized_hamming_64 (b[1] ^ a1); - } - -}; - -struct GenHammingComputer32 { - uint64_t a0, a1, a2, a3; - - GenHammingComputer32 (const uint8_t *a8, int code_size) { - assert (code_size == 32); - const uint64_t *a = (uint64_t *)a8; - a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3]; - } - - inline int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - return generalized_hamming_64 (b[0] ^ a0) + - generalized_hamming_64 (b[1] ^ a1) + - generalized_hamming_64 (b[2] ^ a2) + - generalized_hamming_64 (b[3] ^ a3); - } - -}; - -struct GenHammingComputerM8 { - const uint64_t *a; - int n; - - GenHammingComputerM8 (const uint8_t *a8, int code_size) { - assert (code_size % 8 == 0); - a = (uint64_t *)a8; - n = code_size / 8; - } - - int hamming (const uint8_t *b8) const { - const uint64_t *b = (uint64_t *)b8; - int accu = 0; - for (int i = 0; i < n; i++) - accu += generalized_hamming_64 (a[i] ^ b[i]); - return accu; - } - -}; - - -/** generalized Hamming distances (= count number of code bytes that - are the same) */ -void generalized_hammings_knn_hc ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t code_size, - int ordered = true); - - - -/** This class maintains a list of best distances seen so far. - * - * Since the distances are in a limited range (0 to nbit), the - * object maintains one list per possible distance, and fills - * in only the n-first lists, such that the sum of sizes of the - * n lists is below k. - */ -template -struct HCounterState { - int *counters; - int64_t *ids_per_dis; - - HammingComputer hc; - int thres; - int count_lt; - int count_eq; - int k; - - HCounterState(int *counters, int64_t *ids_per_dis, - const uint8_t *x, int d, int k) - : counters(counters), - ids_per_dis(ids_per_dis), - hc(x, d / 8), - thres(d + 1), - count_lt(0), - count_eq(0), - k(k) {} - - void update_counter(const uint8_t *y, size_t j) { - int32_t dis = hc.hamming(y); - - if (dis <= thres) { - if (dis < thres) { - ids_per_dis[dis * k + counters[dis]++] = j; - ++count_lt; - while (count_lt == k && thres > 0) { - --thres; - count_eq = counters[thres]; - count_lt -= count_eq; - } - } else if (count_eq < k) { - ids_per_dis[dis * k + count_eq++] = j; - counters[dis] = count_eq; - } - } - } -}; - - -} // namespace faiss - - -#endif /* FAISS_hamming_h */ diff --git a/index_io.cpp b/index_io.cpp deleted file mode 100644 index 7bd55aa8c7..0000000000 --- a/index_io.cpp +++ /dev/null @@ -1,1389 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "index_io.h" - -#include -#include - -#include -#include -#include -#include - -#include "FaissAssert.h" -#include "AuxIndexStructures.h" - -#include "IndexFlat.h" -#include "VectorTransform.h" -#include "IndexLSH.h" -#include "IndexPQ.h" -#include "IndexIVF.h" -#include "IndexIVFPQ.h" -#include "IndexIVFFlat.h" -#include "IndexIVFSpectralHash.h" -#include "MetaIndexes.h" -#include "IndexScalarQuantizer.h" -#include "IndexHNSW.h" -#include "OnDiskInvertedLists.h" -#include "IndexBinaryFlat.h" -#include "IndexBinaryFromFloat.h" -#include "IndexBinaryHNSW.h" -#include "IndexBinaryIVF.h" - - - -/************************************************************* - * The I/O format is the content of the class. For objects that are - * inherited, like Index, a 4-character-code (fourcc) indicates which - * child class this is an instance of. - * - * In this case, the fields of the parent class are written first, - * then the ones for the child classes. Note that this requires - * classes to be serialized to have a constructor without parameters, - * so that the fields can be filled in later. The default constructor - * should set reasonable defaults for all fields. - * - * The fourccs are assigned arbitrarily. When the class changed (added - * or deprecated fields), the fourcc can be replaced. New code should - * be able to read the old fourcc and fill in new classes. - * - * TODO: serialization to strings for use in Python pickle or Torch - * serialization. - * - * TODO: in this file, the read functions that encouter errors may - * leak memory. - **************************************************************/ - - - -namespace faiss { - -static uint32_t fourcc (const char sx[4]) { - assert(4 == strlen(sx)); - const unsigned char *x = (unsigned char*)sx; - return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24; -} - -/************************************************************* - * I/O macros - * - * we use macros so that we have a line number to report in abort - * (). This makes debugging a lot easier. The IOReader or IOWriter is - * always called f and thus is not passed in as a macro parameter. - **************************************************************/ - - -#define WRITEANDCHECK(ptr, n) { \ - size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \ - FAISS_THROW_IF_NOT_FMT(ret == (n), \ - "write error in %s: %ld != %ld (%s)", \ - f->name.c_str(), ret, size_t(n), strerror(errno)); \ - } - -#define READANDCHECK(ptr, n) { \ - size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \ - FAISS_THROW_IF_NOT_FMT(ret == (n), \ - "read error in %s: %ld != %ld (%s)", \ - f->name.c_str(), ret, size_t(n), strerror(errno)); \ - } - -#define WRITE1(x) WRITEANDCHECK(&(x), 1) -#define READ1(x) READANDCHECK(&(x), 1) - -#define WRITEVECTOR(vec) { \ - size_t size = (vec).size (); \ - WRITEANDCHECK (&size, 1); \ - WRITEANDCHECK ((vec).data (), size); \ - } - -// will fail if we write 256G of data at once... -#define READVECTOR(vec) { \ - long size; \ - READANDCHECK (&size, 1); \ - FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \ - (vec).resize (size); \ - READANDCHECK ((vec).data (), size); \ - } - -struct ScopeFileCloser { - FILE *f; - ScopeFileCloser (FILE *f): f (f) {} - ~ScopeFileCloser () {fclose (f); } -}; - - -namespace { - -struct FileIOReader: IOReader { - FILE *f = nullptr; - bool need_close = false; - - FileIOReader(FILE *rf): f(rf) {} - - FileIOReader(const char * fname) - { - name = fname; - f = fopen(fname, "rb"); - FAISS_THROW_IF_NOT_FMT ( - f, "could not open %s for reading: %s", - fname, strerror(errno)); - need_close = true; - } - - ~FileIOReader() override { - if (need_close) { - int ret = fclose(f); - if (ret != 0) {// we cannot raise and exception in the destructor - fprintf(stderr, "file %s close error: %s", - name.c_str(), strerror(errno)); - } - } - } - - size_t operator()( - void *ptr, size_t size, size_t nitems) override { - return fread(ptr, size, nitems, f); - } - - int fileno() override { - return ::fileno (f); - } - -}; - -struct FileIOWriter: IOWriter { - FILE *f = nullptr; - bool need_close = false; - - FileIOWriter(FILE *wf): f(wf) {} - - FileIOWriter(const char * fname) - { - name = fname; - f = fopen(fname, "wb"); - FAISS_THROW_IF_NOT_FMT ( - f, "could not open %s for writing: %s", - fname, strerror(errno)); - need_close = true; - } - - ~FileIOWriter() override { - if (need_close) { - int ret = fclose(f); - if (ret != 0) { - // we cannot raise and exception in the destructor - fprintf(stderr, "file %s close error: %s", - name.c_str(), strerror(errno)); - } - } - } - - size_t operator()( - const void *ptr, size_t size, size_t nitems) override { - return fwrite(ptr, size, nitems, f); - } - int fileno() override { - return ::fileno (f); - } - -}; - - -} // namespace - - -/************************************************************* - * Write - **************************************************************/ -static void write_index_header (const Index *idx, IOWriter *f) { - WRITE1 (idx->d); - WRITE1 (idx->ntotal); - Index::idx_t dummy = 1 << 20; - WRITE1 (dummy); - WRITE1 (dummy); - WRITE1 (idx->is_trained); - WRITE1 (idx->metric_type); - if (idx->metric_type > 1) { - WRITE1 (idx->metric_arg); - } -} - -void write_VectorTransform (const VectorTransform *vt, IOWriter *f) { - if (const LinearTransform * lt = - dynamic_cast < const LinearTransform *> (vt)) { - if (dynamic_cast(lt)) { - uint32_t h = fourcc ("rrot"); - WRITE1 (h); - } else if (const PCAMatrix * pca = - dynamic_cast(lt)) { - uint32_t h = fourcc ("PcAm"); - WRITE1 (h); - WRITE1 (pca->eigen_power); - WRITE1 (pca->random_rotation); - WRITE1 (pca->balanced_bins); - WRITEVECTOR (pca->mean); - WRITEVECTOR (pca->eigenvalues); - WRITEVECTOR (pca->PCAMat); - } else { - // generic LinearTransform (includes OPQ) - uint32_t h = fourcc ("LTra"); - WRITE1 (h); - } - WRITE1 (lt->have_bias); - WRITEVECTOR (lt->A); - WRITEVECTOR (lt->b); - } else if (const RemapDimensionsTransform *rdt = - dynamic_cast(vt)) { - uint32_t h = fourcc ("RmDT"); - WRITE1 (h); - WRITEVECTOR (rdt->map); - } else if (const NormalizationTransform *nt = - dynamic_cast(vt)) { - uint32_t h = fourcc ("VNrm"); - WRITE1 (h); - WRITE1 (nt->norm); - } else if (const CenteringTransform *ct = - dynamic_cast(vt)) { - uint32_t h = fourcc ("VCnt"); - WRITE1 (h); - WRITEVECTOR (ct->mean); - } else { - FAISS_THROW_MSG ("cannot serialize this"); - } - // common fields - WRITE1 (vt->d_in); - WRITE1 (vt->d_out); - WRITE1 (vt->is_trained); -} - -void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) { - WRITE1 (pq->d); - WRITE1 (pq->M); - WRITE1 (pq->nbits); - WRITEVECTOR (pq->centroids); -} - -static void write_ScalarQuantizer ( - const ScalarQuantizer *ivsc, IOWriter *f) { - WRITE1 (ivsc->qtype); - WRITE1 (ivsc->rangestat); - WRITE1 (ivsc->rangestat_arg); - WRITE1 (ivsc->d); - WRITE1 (ivsc->code_size); - WRITEVECTOR (ivsc->trained); -} - -void write_InvertedLists (const InvertedLists *ils, IOWriter *f) { - if (ils == nullptr) { - uint32_t h = fourcc ("il00"); - WRITE1 (h); - } else if (const auto & ails = - dynamic_cast(ils)) { - uint32_t h = fourcc ("ilar"); - WRITE1 (h); - WRITE1 (ails->nlist); - WRITE1 (ails->code_size); - // here we store either as a full or a sparse data buffer - size_t n_non0 = 0; - for (size_t i = 0; i < ails->nlist; i++) { - if (ails->ids[i].size() > 0) - n_non0++; - } - if (n_non0 > ails->nlist / 2) { - uint32_t list_type = fourcc("full"); - WRITE1 (list_type); - std::vector sizes; - for (size_t i = 0; i < ails->nlist; i++) { - sizes.push_back (ails->ids[i].size()); - } - WRITEVECTOR (sizes); - } else { - int list_type = fourcc("sprs"); // sparse - WRITE1 (list_type); - std::vector sizes; - for (size_t i = 0; i < ails->nlist; i++) { - size_t n = ails->ids[i].size(); - if (n > 0) { - sizes.push_back (i); - sizes.push_back (n); - } - } - WRITEVECTOR (sizes); - } - // make a single contiguous data buffer (useful for mmapping) - for (size_t i = 0; i < ails->nlist; i++) { - size_t n = ails->ids[i].size(); - if (n > 0) { - WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size); - WRITEANDCHECK (ails->ids[i].data(), n); - } - } - } else if (const auto & od = - dynamic_cast(ils)) { - uint32_t h = fourcc ("ilod"); - WRITE1 (h); - WRITE1 (ils->nlist); - WRITE1 (ils->code_size); - // this is a POD object - WRITEVECTOR (od->lists); - - { - std::vector v( - od->slots.begin(), od->slots.end()); - WRITEVECTOR(v); - } - { - std::vector x(od->filename.begin(), od->filename.end()); - WRITEVECTOR(x); - } - WRITE1(od->totsize); - - } else { - fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, " - "saving null invlist\n"); - uint32_t h = fourcc ("il00"); - WRITE1 (h); - } -} - - -void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) { - FileIOWriter writer(fname); - write_ProductQuantizer (pq, &writer); -} - -static void write_HNSW (const HNSW *hnsw, IOWriter *f) { - - WRITEVECTOR (hnsw->assign_probas); - WRITEVECTOR (hnsw->cum_nneighbor_per_level); - WRITEVECTOR (hnsw->levels); - WRITEVECTOR (hnsw->offsets); - WRITEVECTOR (hnsw->neighbors); - - WRITE1 (hnsw->entry_point); - WRITE1 (hnsw->max_level); - WRITE1 (hnsw->efConstruction); - WRITE1 (hnsw->efSearch); - WRITE1 (hnsw->upper_beam); -} - -static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) { - write_index_header (ivf, f); - WRITE1 (ivf->nlist); - WRITE1 (ivf->nprobe); - write_index (ivf->quantizer, f); - WRITE1 (ivf->maintain_direct_map); - WRITEVECTOR (ivf->direct_map); -} - -void write_index (const Index *idx, IOWriter *f) { - if (const IndexFlat * idxf = dynamic_cast (idx)) { - uint32_t h = fourcc ( - idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" : - idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr); - WRITE1 (h); - write_index_header (idx, f); - WRITEVECTOR (idxf->xb); - } else if(const IndexLSH * idxl = dynamic_cast (idx)) { - uint32_t h = fourcc ("IxHe"); - WRITE1 (h); - write_index_header (idx, f); - WRITE1 (idxl->nbits); - WRITE1 (idxl->rotate_data); - WRITE1 (idxl->train_thresholds); - WRITEVECTOR (idxl->thresholds); - WRITE1 (idxl->bytes_per_vec); - write_VectorTransform (&idxl->rrot, f); - WRITEVECTOR (idxl->codes); - } else if(const IndexPQ * idxp = dynamic_cast (idx)) { - uint32_t h = fourcc ("IxPq"); - WRITE1 (h); - write_index_header (idx, f); - write_ProductQuantizer (&idxp->pq, f); - WRITEVECTOR (idxp->codes); - // search params -- maybe not useful to store? - WRITE1 (idxp->search_type); - WRITE1 (idxp->encode_signs); - WRITE1 (idxp->polysemous_ht); - } else if(const Index2Layer * idxp = - dynamic_cast (idx)) { - uint32_t h = fourcc ("Ix2L"); - WRITE1 (h); - write_index_header (idx, f); - write_index (idxp->q1.quantizer, f); - WRITE1 (idxp->q1.nlist); - WRITE1 (idxp->q1.quantizer_trains_alone); - write_ProductQuantizer (&idxp->pq, f); - WRITE1 (idxp->code_size_1); - WRITE1 (idxp->code_size_2); - WRITE1 (idxp->code_size); - WRITEVECTOR (idxp->codes); - } else if(const IndexScalarQuantizer * idxs = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IxSQ"); - WRITE1 (h); - write_index_header (idx, f); - write_ScalarQuantizer (&idxs->sq, f); - WRITEVECTOR (idxs->codes); - } else if(const IndexIVFFlatDedup * ivfl = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IwFd"); - WRITE1 (h); - write_ivf_header (ivfl, f); - { - std::vector tab (2 * ivfl->instances.size()); - long i = 0; - for (auto it = ivfl->instances.begin(); - it != ivfl->instances.end(); ++it) { - tab[i++] = it->first; - tab[i++] = it->second; - } - WRITEVECTOR (tab); - } - write_InvertedLists (ivfl->invlists, f); - } else if(const IndexIVFFlat * ivfl = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IwFl"); - WRITE1 (h); - write_ivf_header (ivfl, f); - write_InvertedLists (ivfl->invlists, f); - } else if(const IndexIVFScalarQuantizer * ivsc = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IwSq"); - WRITE1 (h); - write_ivf_header (ivsc, f); - write_ScalarQuantizer (&ivsc->sq, f); - WRITE1 (ivsc->code_size); - WRITE1 (ivsc->by_residual); - write_InvertedLists (ivsc->invlists, f); - } else if(const IndexIVFSpectralHash *ivsp = - dynamic_cast(idx)) { - uint32_t h = fourcc ("IwSh"); - WRITE1 (h); - write_ivf_header (ivsp, f); - write_VectorTransform (ivsp->vt, f); - WRITE1 (ivsp->nbit); - WRITE1 (ivsp->period); - WRITE1 (ivsp->threshold_type); - WRITEVECTOR (ivsp->trained); - write_InvertedLists (ivsp->invlists, f); - } else if(const IndexIVFPQ * ivpq = - dynamic_cast (idx)) { - const IndexIVFPQR * ivfpqr = dynamic_cast (idx); - - uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ"); - WRITE1 (h); - write_ivf_header (ivpq, f); - WRITE1 (ivpq->by_residual); - WRITE1 (ivpq->code_size); - write_ProductQuantizer (&ivpq->pq, f); - write_InvertedLists (ivpq->invlists, f); - if (ivfpqr) { - write_ProductQuantizer (&ivfpqr->refine_pq, f); - WRITEVECTOR (ivfpqr->refine_codes); - WRITE1 (ivfpqr->k_factor); - } - - } else if(const IndexPreTransform * ixpt = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IxPT"); - WRITE1 (h); - write_index_header (ixpt, f); - int nt = ixpt->chain.size(); - WRITE1 (nt); - for (int i = 0; i < nt; i++) - write_VectorTransform (ixpt->chain[i], f); - write_index (ixpt->index, f); - } else if(const MultiIndexQuantizer * imiq = - dynamic_cast (idx)) { - uint32_t h = fourcc ("Imiq"); - WRITE1 (h); - write_index_header (imiq, f); - write_ProductQuantizer (&imiq->pq, f); - } else if(const IndexRefineFlat * idxrf = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IxRF"); - WRITE1 (h); - write_index_header (idxrf, f); - write_index (idxrf->base_index, f); - write_index (&idxrf->refine_index, f); - WRITE1 (idxrf->k_factor); - } else if(const IndexIDMap * idxmap = - dynamic_cast (idx)) { - uint32_t h = - dynamic_cast (idx) ? fourcc ("IxM2") : - fourcc ("IxMp"); - // no need to store additional info for IndexIDMap2 - WRITE1 (h); - write_index_header (idxmap, f); - write_index (idxmap->index, f); - WRITEVECTOR (idxmap->id_map); - } else if(const IndexHNSW * idxhnsw = - dynamic_cast (idx)) { - uint32_t h = - dynamic_cast(idx) ? fourcc("IHNf") : - dynamic_cast(idx) ? fourcc("IHNp") : - dynamic_cast(idx) ? fourcc("IHNs") : - dynamic_cast(idx) ? fourcc("IHN2") : - 0; - FAISS_THROW_IF_NOT (h != 0); - WRITE1 (h); - write_index_header (idxhnsw, f); - write_HNSW (&idxhnsw->hnsw, f); - write_index (idxhnsw->storage, f); - } else { - FAISS_THROW_MSG ("don't know how to serialize this type of index"); - } -} - -void write_index (const Index *idx, FILE *f) { - FileIOWriter writer(f); - write_index (idx, &writer); -} - -void write_index (const Index *idx, const char *fname) { - FileIOWriter writer(fname); - write_index (idx, &writer); -} - -void write_VectorTransform (const VectorTransform *vt, const char *fname) { - FileIOWriter writer(fname); - write_VectorTransform (vt, &writer); -} - -/************************************************************* - * Read - **************************************************************/ - -static void read_index_header (Index *idx, IOReader *f) { - READ1 (idx->d); - READ1 (idx->ntotal); - Index::idx_t dummy; - READ1 (dummy); - READ1 (dummy); - READ1 (idx->is_trained); - READ1 (idx->metric_type); - if (idx->metric_type > 1) { - READ1 (idx->metric_arg); - } - idx->verbose = false; -} - -VectorTransform* read_VectorTransform (IOReader *f) { - uint32_t h; - READ1 (h); - VectorTransform *vt = nullptr; - - if (h == fourcc ("rrot") || h == fourcc ("PCAm") || - h == fourcc ("LTra") || h == fourcc ("PcAm")) { - LinearTransform *lt = nullptr; - if (h == fourcc ("rrot")) { - lt = new RandomRotationMatrix (); - } else if (h == fourcc ("PCAm") || - h == fourcc ("PcAm")) { - PCAMatrix * pca = new PCAMatrix (); - READ1 (pca->eigen_power); - READ1 (pca->random_rotation); - if (h == fourcc ("PcAm")) - READ1 (pca->balanced_bins); - READVECTOR (pca->mean); - READVECTOR (pca->eigenvalues); - READVECTOR (pca->PCAMat); - lt = pca; - } else if (h == fourcc ("LTra")) { - lt = new LinearTransform (); - } - READ1 (lt->have_bias); - READVECTOR (lt->A); - READVECTOR (lt->b); - FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out); - FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out); - lt->set_is_orthonormal(); - vt = lt; - } else if (h == fourcc ("RmDT")) { - RemapDimensionsTransform *rdt = new RemapDimensionsTransform (); - READVECTOR (rdt->map); - vt = rdt; - } else if (h == fourcc ("VNrm")) { - NormalizationTransform *nt = new NormalizationTransform (); - READ1 (nt->norm); - vt = nt; - } else if (h == fourcc ("VCnt")) { - CenteringTransform *ct = new CenteringTransform (); - READVECTOR (ct->mean); - vt = ct; - } else { - FAISS_THROW_MSG("fourcc not recognized"); - } - READ1 (vt->d_in); - READ1 (vt->d_out); - READ1 (vt->is_trained); - return vt; -} - - -static void read_ArrayInvertedLists_sizes ( - IOReader *f, std::vector & sizes) -{ - uint32_t list_type; - READ1(list_type); - if (list_type == fourcc("full")) { - size_t os = sizes.size(); - READVECTOR (sizes); - FAISS_THROW_IF_NOT (os == sizes.size()); - } else if (list_type == fourcc("sprs")) { - std::vector idsizes; - READVECTOR (idsizes); - for (size_t j = 0; j < idsizes.size(); j += 2) { - FAISS_THROW_IF_NOT (idsizes[j] < sizes.size()); - sizes[idsizes[j]] = idsizes[j + 1]; - } - } else { - FAISS_THROW_MSG ("invalid list_type"); - } -} - -InvertedLists *read_InvertedLists (IOReader *f, int io_flags) { - uint32_t h; - READ1 (h); - if (h == fourcc ("il00")) { - fprintf(stderr, "read_InvertedLists:" - " WARN! inverted lists not stored with IVF object\n"); - return nullptr; - } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_MMAP)) { - auto ails = new ArrayInvertedLists (0, 0); - READ1 (ails->nlist); - READ1 (ails->code_size); - ails->ids.resize (ails->nlist); - ails->codes.resize (ails->nlist); - std::vector sizes (ails->nlist); - read_ArrayInvertedLists_sizes (f, sizes); - for (size_t i = 0; i < ails->nlist; i++) { - ails->ids[i].resize (sizes[i]); - ails->codes[i].resize (sizes[i] * ails->code_size); - } - for (size_t i = 0; i < ails->nlist; i++) { - size_t n = ails->ids[i].size(); - if (n > 0) { - READANDCHECK (ails->codes[i].data(), n * ails->code_size); - READANDCHECK (ails->ids[i].data(), n); - } - } - return ails; - } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) { - // then we load it as an OnDiskInvertedLists - - FileIOReader *reader = dynamic_cast(f); - FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects"); - FILE *fdesc = reader->f; - - auto ails = new OnDiskInvertedLists (); - READ1 (ails->nlist); - READ1 (ails->code_size); - ails->read_only = true; - ails->lists.resize (ails->nlist); - std::vector sizes (ails->nlist); - read_ArrayInvertedLists_sizes (f, sizes); - size_t o0 = ftell(fdesc), o = o0; - { // do the mmap - struct stat buf; - int ret = fstat (fileno(fdesc), &buf); - FAISS_THROW_IF_NOT_FMT (ret == 0, - "fstat failed: %s", strerror(errno)); - ails->totsize = buf.st_size; - ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize, - PROT_READ, MAP_SHARED, - fileno(fdesc), 0); - FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED, - "could not mmap: %s", - strerror(errno)); - } - - for (size_t i = 0; i < ails->nlist; i++) { - OnDiskInvertedLists::List & l = ails->lists[i]; - l.size = l.capacity = sizes[i]; - l.offset = o; - o += l.size * (sizeof(OnDiskInvertedLists::idx_t) + - ails->code_size); - } - FAISS_THROW_IF_NOT(o <= ails->totsize); - // resume normal reading of file - fseek (fdesc, o, SEEK_SET); - return ails; - } else if (h == fourcc ("ilod")) { - OnDiskInvertedLists *od = new OnDiskInvertedLists(); - od->read_only = io_flags & IO_FLAG_READ_ONLY; - READ1 (od->nlist); - READ1 (od->code_size); - // this is a POD object - READVECTOR (od->lists); - { - std::vector v; - READVECTOR(v); - od->slots.assign(v.begin(), v.end()); - } - { - std::vector x; - READVECTOR(x); - od->filename.assign(x.begin(), x.end()); - - if (io_flags & IO_FLAG_ONDISK_SAME_DIR) { - FileIOReader *reader = dynamic_cast(f); - FAISS_THROW_IF_NOT_MSG ( - reader, "IO_FLAG_ONDISK_SAME_DIR only supported " - "when reading from file"); - std::string indexname = reader->name; - std::string dirname = "./"; - size_t slash = indexname.find_last_of('/'); - if (slash != std::string::npos) { - dirname = indexname.substr(0, slash + 1); - } - std::string filename = od->filename; - slash = filename.find_last_of('/'); - if (slash != std::string::npos) { - filename = filename.substr(slash + 1); - } - filename = dirname + filename; - printf("IO_FLAG_ONDISK_SAME_DIR: " - "updating ondisk filename from %s to %s\n", - od->filename.c_str(), filename.c_str()); - od->filename = filename; - } - - } - READ1(od->totsize); - od->do_mmap(); - return od; - } else { - FAISS_THROW_MSG ("read_InvertedLists: unsupported invlist type"); - } -} - -static void read_InvertedLists ( - IndexIVF *ivf, IOReader *f, int io_flags) { - InvertedLists *ils = read_InvertedLists (f, io_flags); - FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist && - ils->code_size == ivf->code_size)); - ivf->invlists = ils; - ivf->own_invlists = true; -} - -static void read_InvertedLists ( - IndexBinaryIVF *ivf, IOReader *f, int io_flags) { - InvertedLists *ils = read_InvertedLists (f, io_flags); - FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist && - ils->code_size == ivf->code_size)); - ivf->invlists = ils; - ivf->own_invlists = true; -} - -static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) { - READ1 (pq->d); - READ1 (pq->M); - READ1 (pq->nbits); - pq->set_derived_values (); - READVECTOR (pq->centroids); -} - -static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) { - READ1 (ivsc->qtype); - READ1 (ivsc->rangestat); - READ1 (ivsc->rangestat_arg); - READ1 (ivsc->d); - READ1 (ivsc->code_size); - READVECTOR (ivsc->trained); -} - - -static void read_HNSW (HNSW *hnsw, IOReader *f) { - READVECTOR (hnsw->assign_probas); - READVECTOR (hnsw->cum_nneighbor_per_level); - READVECTOR (hnsw->levels); - READVECTOR (hnsw->offsets); - READVECTOR (hnsw->neighbors); - - READ1 (hnsw->entry_point); - READ1 (hnsw->max_level); - READ1 (hnsw->efConstruction); - READ1 (hnsw->efSearch); - READ1 (hnsw->upper_beam); -} - -ProductQuantizer * read_ProductQuantizer (const char*fname) { - FileIOReader reader(fname); - return read_ProductQuantizer(&reader); -} - -ProductQuantizer * read_ProductQuantizer (IOReader *reader) { - ProductQuantizer *pq = new ProductQuantizer(); - ScopeDeleter1 del (pq); - - read_ProductQuantizer(pq, reader); - del.release (); - return pq; -} - -static void read_ivf_header ( - IndexIVF *ivf, IOReader *f, - std::vector > *ids = nullptr) -{ - read_index_header (ivf, f); - READ1 (ivf->nlist); - READ1 (ivf->nprobe); - ivf->quantizer = read_index (f); - ivf->own_fields = true; - if (ids) { // used in legacy "Iv" formats - ids->resize (ivf->nlist); - for (size_t i = 0; i < ivf->nlist; i++) - READVECTOR ((*ids)[i]); - } - READ1 (ivf->maintain_direct_map); - READVECTOR (ivf->direct_map); -} - -// used for legacy formats -static ArrayInvertedLists *set_array_invlist( - IndexIVF *ivf, std::vector > &ids) -{ - ArrayInvertedLists *ail = new ArrayInvertedLists ( - ivf->nlist, ivf->code_size); - std::swap (ail->ids, ids); - ivf->invlists = ail; - ivf->own_invlists = true; - return ail; -} - -static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags) -{ - bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ"); - - IndexIVFPQR *ivfpqr = - h == fourcc ("IvQR") || h == fourcc ("IwQR") ? - new IndexIVFPQR () : nullptr; - IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ (); - - std::vector > ids; - read_ivf_header (ivpq, f, legacy ? &ids : nullptr); - READ1 (ivpq->by_residual); - READ1 (ivpq->code_size); - read_ProductQuantizer (&ivpq->pq, f); - - if (legacy) { - ArrayInvertedLists *ail = set_array_invlist (ivpq, ids); - for (size_t i = 0; i < ail->nlist; i++) - READVECTOR (ail->codes[i]); - } else { - read_InvertedLists (ivpq, f, io_flags); - } - - if (ivpq->is_trained) { - // precomputed table not stored. It is cheaper to recompute it - ivpq->use_precomputed_table = 0; - if (ivpq->by_residual) - ivpq->precompute_table (); - if (ivfpqr) { - read_ProductQuantizer (&ivfpqr->refine_pq, f); - READVECTOR (ivfpqr->refine_codes); - READ1 (ivfpqr->k_factor); - } - } - return ivpq; -} - -int read_old_fmt_hack = 0; - -Index *read_index (IOReader *f, int io_flags) { - Index * idx = nullptr; - uint32_t h; - READ1 (h); - if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) { - IndexFlat *idxf; - if (h == fourcc ("IxFI")) idxf = new IndexFlatIP (); - else idxf = new IndexFlatL2 (); - read_index_header (idxf, f); - READVECTOR (idxf->xb); - FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d); - // leak! - idx = idxf; - } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) { - IndexLSH * idxl = new IndexLSH (); - read_index_header (idxl, f); - READ1 (idxl->nbits); - READ1 (idxl->rotate_data); - READ1 (idxl->train_thresholds); - READVECTOR (idxl->thresholds); - READ1 (idxl->bytes_per_vec); - if (h == fourcc("IxHE")) { - FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0, - "can only read old format IndexLSH with " - "nbits multiple of 64 (got %d)", - (int) idxl->nbits); - // leak - idxl->bytes_per_vec *= 8; - } - { - RandomRotationMatrix *rrot = dynamic_cast - (read_VectorTransform (f)); - FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation"); - idxl->rrot = *rrot; - delete rrot; - } - READVECTOR (idxl->codes); - FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d && - idxl->rrot.d_out == idxl->nbits); - FAISS_THROW_IF_NOT ( - idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec); - idx = idxl; - } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") || - h == fourcc ("IxPq")) { - // IxPQ and IxPo were merged into the same IndexPQ object - IndexPQ * idxp =new IndexPQ (); - read_index_header (idxp, f); - read_ProductQuantizer (&idxp->pq, f); - READVECTOR (idxp->codes); - if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) { - READ1 (idxp->search_type); - READ1 (idxp->encode_signs); - READ1 (idxp->polysemous_ht); - } - // Old versoins of PQ all had metric_type set to INNER_PRODUCT - // when they were in fact using L2. Therefore, we force metric type - // to L2 when the old format is detected - if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) { - idxp->metric_type = METRIC_L2; - } - idx = idxp; - } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy - IndexIVFFlat * ivfl = new IndexIVFFlat (); - std::vector > ids; - read_ivf_header (ivfl, f, &ids); - ivfl->code_size = ivfl->d * sizeof(float); - ArrayInvertedLists *ail = set_array_invlist (ivfl, ids); - - if (h == fourcc ("IvFL")) { - for (size_t i = 0; i < ivfl->nlist; i++) { - READVECTOR (ail->codes[i]); - } - } else { // old format - for (size_t i = 0; i < ivfl->nlist; i++) { - std::vector vec; - READVECTOR (vec); - ail->codes[i].resize(vec.size() * sizeof(float)); - memcpy(ail->codes[i].data(), vec.data(), - ail->codes[i].size()); - } - } - idx = ivfl; - } else if (h == fourcc ("IwFd")) { - IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup (); - read_ivf_header (ivfl, f); - ivfl->code_size = ivfl->d * sizeof(float); - { - std::vector tab; - READVECTOR (tab); - for (long i = 0; i < tab.size(); i += 2) { - std::pair - pair (tab[i], tab[i + 1]); - ivfl->instances.insert (pair); - } - } - read_InvertedLists (ivfl, f, io_flags); - idx = ivfl; - } else if (h == fourcc ("IwFl")) { - IndexIVFFlat * ivfl = new IndexIVFFlat (); - read_ivf_header (ivfl, f); - ivfl->code_size = ivfl->d * sizeof(float); - read_InvertedLists (ivfl, f, io_flags); - idx = ivfl; - } else if (h == fourcc ("IxSQ")) { - IndexScalarQuantizer * idxs = new IndexScalarQuantizer (); - read_index_header (idxs, f); - read_ScalarQuantizer (&idxs->sq, f); - READVECTOR (idxs->codes); - idxs->code_size = idxs->sq.code_size; - idx = idxs; - } else if(h == fourcc ("IvSQ")) { // legacy - IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer(); - std::vector > ids; - read_ivf_header (ivsc, f, &ids); - read_ScalarQuantizer (&ivsc->sq, f); - READ1 (ivsc->code_size); - ArrayInvertedLists *ail = set_array_invlist (ivsc, ids); - for(int i = 0; i < ivsc->nlist; i++) - READVECTOR (ail->codes[i]); - idx = ivsc; - } else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) { - IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer(); - read_ivf_header (ivsc, f); - read_ScalarQuantizer (&ivsc->sq, f); - READ1 (ivsc->code_size); - if (h == fourcc ("IwSQ")) { - ivsc->by_residual = true; - } else { - READ1 (ivsc->by_residual); - } - read_InvertedLists (ivsc, f, io_flags); - idx = ivsc; - } else if(h == fourcc ("IwSh")) { - IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash (); - read_ivf_header (ivsp, f); - ivsp->vt = read_VectorTransform (f); - ivsp->own_fields = true; - READ1 (ivsp->nbit); - // not stored by write_ivf_header - ivsp->code_size = (ivsp->nbit + 7) / 8; - READ1 (ivsp->period); - READ1 (ivsp->threshold_type); - READVECTOR (ivsp->trained); - read_InvertedLists (ivsp, f, io_flags); - idx = ivsp; - } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") || - h == fourcc ("IwPQ") || h == fourcc ("IwQR")) { - - idx = read_ivfpq (f, h, io_flags); - - } else if(h == fourcc ("IxPT")) { - IndexPreTransform * ixpt = new IndexPreTransform(); - ixpt->own_fields = true; - read_index_header (ixpt, f); - int nt; - if (read_old_fmt_hack == 2) { - nt = 1; - } else { - READ1 (nt); - } - for (int i = 0; i < nt; i++) { - ixpt->chain.push_back (read_VectorTransform (f)); - } - ixpt->index = read_index (f, io_flags); - idx = ixpt; - } else if(h == fourcc ("Imiq")) { - MultiIndexQuantizer * imiq = new MultiIndexQuantizer (); - read_index_header (imiq, f); - read_ProductQuantizer (&imiq->pq, f); - idx = imiq; - } else if(h == fourcc ("IxRF")) { - IndexRefineFlat *idxrf = new IndexRefineFlat (); - read_index_header (idxrf, f); - idxrf->base_index = read_index(f, io_flags); - idxrf->own_fields = true; - IndexFlat *rf = dynamic_cast (read_index (f, io_flags)); - std::swap (*rf, idxrf->refine_index); - delete rf; - READ1 (idxrf->k_factor); - idx = idxrf; - } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) { - bool is_map2 = h == fourcc ("IxM2"); - IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap (); - read_index_header (idxmap, f); - idxmap->index = read_index (f, io_flags); - idxmap->own_fields = true; - READVECTOR (idxmap->id_map); - if (is_map2) { - static_cast(idxmap)->construct_rev_map (); - } - idx = idxmap; - } else if (h == fourcc ("Ix2L")) { - Index2Layer * idxp = new Index2Layer (); - read_index_header (idxp, f); - idxp->q1.quantizer = read_index (f, io_flags); - READ1 (idxp->q1.nlist); - READ1 (idxp->q1.quantizer_trains_alone); - read_ProductQuantizer (&idxp->pq, f); - READ1 (idxp->code_size_1); - READ1 (idxp->code_size_2); - READ1 (idxp->code_size); - READVECTOR (idxp->codes); - idx = idxp; - } else if(h == fourcc("IHNf") || h == fourcc("IHNp") || - h == fourcc("IHNs") || h == fourcc("IHN2")) { - IndexHNSW *idxhnsw = nullptr; - if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat (); - if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ (); - if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ (); - if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level (); - read_index_header (idxhnsw, f); - read_HNSW (&idxhnsw->hnsw, f); - idxhnsw->storage = read_index (f, io_flags); - idxhnsw->own_fields = true; - if (h == fourcc("IHNp")) { - dynamic_cast(idxhnsw->storage)->pq.compute_sdc_table (); - } - idx = idxhnsw; - } else { - FAISS_THROW_FMT("Index type 0x%08x not supported\n", h); - idx = nullptr; - } - return idx; -} - - -Index *read_index (FILE * f, int io_flags) { - FileIOReader reader(f); - return read_index(&reader, io_flags); -} - -Index *read_index (const char *fname, int io_flags) { - FileIOReader reader(fname); - Index *idx = read_index (&reader, io_flags); - return idx; -} - -VectorTransform *read_VectorTransform (const char *fname) { - FileIOReader reader(fname); - VectorTransform *vt = read_VectorTransform (&reader); - return vt; -} - -/************************************************************* - * cloning functions - **************************************************************/ - - - -Index * clone_index (const Index *index) -{ - Cloner cl; - return cl.clone_Index (index); -} - -// assumes there is a copy constructor ready. Always try from most -// specific to most general -#define TRYCLONE(classname, obj) \ - if (const classname *clo = dynamic_cast(obj)) { \ - return new classname(*clo); \ - } else - -VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt) -{ - TRYCLONE (RemapDimensionsTransform, vt) - TRYCLONE (OPQMatrix, vt) - TRYCLONE (PCAMatrix, vt) - TRYCLONE (RandomRotationMatrix, vt) - TRYCLONE (LinearTransform, vt) - { - FAISS_THROW_MSG("clone not supported for this type of VectorTransform"); - } - return nullptr; -} - -IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf) -{ - TRYCLONE (IndexIVFPQR, ivf) - TRYCLONE (IndexIVFPQ, ivf) - TRYCLONE (IndexIVFFlat, ivf) - TRYCLONE (IndexIVFScalarQuantizer, ivf) - { - FAISS_THROW_MSG("clone not supported for this type of IndexIVF"); - } - return nullptr; -} - -Index *Cloner::clone_Index (const Index *index) -{ - TRYCLONE (IndexPQ, index) - TRYCLONE (IndexLSH, index) - TRYCLONE (IndexFlatL2, index) - TRYCLONE (IndexFlatIP, index) - TRYCLONE (IndexFlat, index) - TRYCLONE (IndexScalarQuantizer, index) - TRYCLONE (MultiIndexQuantizer, index) - if (const IndexIVF * ivf = dynamic_cast(index)) { - IndexIVF *res = clone_IndexIVF (ivf); - if (ivf->invlists == nullptr) { - res->invlists = nullptr; - } else if (auto *ails = dynamic_cast - (ivf->invlists)) { - res->invlists = new ArrayInvertedLists(*ails); - res->own_invlists = true; - } else { - FAISS_THROW_MSG( "clone not supported for this type of inverted lists"); - } - res->own_fields = true; - res->quantizer = clone_Index (ivf->quantizer); - return res; - } else if (const IndexPreTransform * ipt = - dynamic_cast (index)) { - IndexPreTransform *res = new IndexPreTransform (); - res->d = ipt->d; - res->index = clone_Index (ipt->index); - for (int i = 0; i < ipt->chain.size(); i++) - res->chain.push_back (clone_VectorTransform (ipt->chain[i])); - res->own_fields = true; - return res; - } else if (const IndexIDMap *idmap = - dynamic_cast (index)) { - IndexIDMap *res = new IndexIDMap (*idmap); - res->own_fields = true; - res->index = clone_Index (idmap->index); - return res; - } else { - FAISS_THROW_MSG( "clone not supported for this type of Index"); - } - return nullptr; -} - - -static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) { - WRITE1 (idx->d); - WRITE1 (idx->code_size); - WRITE1 (idx->ntotal); - WRITE1 (idx->is_trained); - WRITE1 (idx->metric_type); -} - -static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) { - write_index_binary_header (ivf, f); - WRITE1 (ivf->nlist); - WRITE1 (ivf->nprobe); - write_index_binary (ivf->quantizer, f); - WRITE1 (ivf->maintain_direct_map); - WRITEVECTOR (ivf->direct_map); -} - -void write_index_binary (const IndexBinary *idx, IOWriter *f) { - if (const IndexBinaryFlat *idxf = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBxF"); - WRITE1 (h); - write_index_binary_header (idx, f); - WRITEVECTOR (idxf->xb); - } else if (const IndexBinaryIVF *ivf = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBwF"); - WRITE1 (h); - write_binary_ivf_header (ivf, f); - write_InvertedLists (ivf->invlists, f); - } else if(const IndexBinaryFromFloat * idxff = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBFf"); - WRITE1 (h); - write_index_binary_header (idxff, f); - write_index (idxff->index, f); - } else if (const IndexBinaryHNSW *idxhnsw = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBHf"); - WRITE1 (h); - write_index_binary_header (idxhnsw, f); - write_HNSW (&idxhnsw->hnsw, f); - write_index_binary (idxhnsw->storage, f); - } else if(const IndexBinaryIDMap * idxmap = - dynamic_cast (idx)) { - uint32_t h = - dynamic_cast (idx) ? fourcc ("IBM2") : - fourcc ("IBMp"); - // no need to store additional info for IndexIDMap2 - WRITE1 (h); - write_index_binary_header (idxmap, f); - write_index_binary (idxmap->index, f); - WRITEVECTOR (idxmap->id_map); - } else { - FAISS_THROW_MSG ("don't know how to serialize this type of index"); - } -} - -void write_index_binary (const IndexBinary *idx, FILE *f) { - FileIOWriter writer(f); - write_index_binary(idx, &writer); -} - -void write_index_binary (const IndexBinary *idx, const char *fname) { - FileIOWriter writer(fname); - write_index_binary (idx, &writer); -} - -static void read_index_binary_header (IndexBinary *idx, IOReader *f) { - READ1 (idx->d); - READ1 (idx->code_size); - READ1 (idx->ntotal); - READ1 (idx->is_trained); - READ1 (idx->metric_type); - idx->verbose = false; -} - -static void read_binary_ivf_header ( - IndexBinaryIVF *ivf, IOReader *f, - std::vector > *ids = nullptr) -{ - read_index_binary_header (ivf, f); - READ1 (ivf->nlist); - READ1 (ivf->nprobe); - ivf->quantizer = read_index_binary (f); - ivf->own_fields = true; - if (ids) { // used in legacy "Iv" formats - ids->resize (ivf->nlist); - for (size_t i = 0; i < ivf->nlist; i++) - READVECTOR ((*ids)[i]); - } - READ1 (ivf->maintain_direct_map); - READVECTOR (ivf->direct_map); -} - -IndexBinary *read_index_binary (IOReader *f, int io_flags) { - IndexBinary * idx = nullptr; - uint32_t h; - READ1 (h); - if (h == fourcc ("IBxF")) { - IndexBinaryFlat *idxf = new IndexBinaryFlat (); - read_index_binary_header (idxf, f); - READVECTOR (idxf->xb); - FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size); - // leak! - idx = idxf; - } else if (h == fourcc ("IBwF")) { - IndexBinaryIVF *ivf = new IndexBinaryIVF (); - read_binary_ivf_header (ivf, f); - read_InvertedLists (ivf, f, io_flags); - idx = ivf; - } else if (h == fourcc ("IBFf")) { - IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat (); - read_index_binary_header (idxff, f); - idxff->own_fields = true; - idxff->index = read_index (f, io_flags); - idx = idxff; - } else if (h == fourcc ("IBHf")) { - IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW (); - read_index_binary_header (idxhnsw, f); - read_HNSW (&idxhnsw->hnsw, f); - idxhnsw->storage = read_index_binary (f, io_flags); - idxhnsw->own_fields = true; - idx = idxhnsw; - } else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) { - bool is_map2 = h == fourcc ("IBM2"); - IndexBinaryIDMap * idxmap = is_map2 ? - new IndexBinaryIDMap2 () : new IndexBinaryIDMap (); - read_index_binary_header (idxmap, f); - idxmap->index = read_index_binary (f, io_flags); - idxmap->own_fields = true; - READVECTOR (idxmap->id_map); - if (is_map2) { - static_cast(idxmap)->construct_rev_map (); - } - idx = idxmap; - } else { - FAISS_THROW_FMT("Index type 0x%08x not supported\n", h); - idx = nullptr; - } - return idx; -} - -IndexBinary *read_index_binary (FILE * f, int io_flags) { - FileIOReader reader(f); - return read_index_binary(&reader, io_flags); -} - -IndexBinary *read_index_binary (const char *fname, int io_flags) { - FileIOReader reader(fname); - IndexBinary *idx = read_index_binary (&reader, io_flags); - return idx; -} - - -} // namespace faiss diff --git a/utils.cpp b/utils.cpp deleted file mode 100644 index a96e7d5087..0000000000 --- a/utils.cpp +++ /dev/null @@ -1,1612 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "utils.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include -#include - -#include "AuxIndexStructures.h" -#include "FaissAssert.h" - - - -#ifndef FINTEGER -#define FINTEGER long -#endif - - -extern "C" { - -/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */ - -int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER * - n, FINTEGER *k, const float *alpha, const float *a, - FINTEGER *lda, const float *b, FINTEGER * - ldb, float *beta, float *c, FINTEGER *ldc); - -/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */ - -int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda, - float *tau, float *work, FINTEGER *lwork, FINTEGER *info); - -int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a, - FINTEGER *lda, float *tau, float *work, - FINTEGER *lwork, FINTEGER *info); - -int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha, - const float *a, FINTEGER *lda, const float *x, FINTEGER *incx, - float *beta, float *y, FINTEGER *incy); - -} - - -/************************************************** - * Get some stats about the system - **************************************************/ - -namespace faiss { - -double getmillisecs () { - struct timeval tv; - gettimeofday (&tv, nullptr); - return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3; -} - - -#ifdef __linux__ - -size_t get_mem_usage_kb () -{ - int pid = getpid (); - char fname[256]; - snprintf (fname, 256, "/proc/%d/status", pid); - FILE * f = fopen (fname, "r"); - FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file"); - size_t sz = 0; - for (;;) { - char buf [256]; - if (!fgets (buf, 256, f)) break; - if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break; - } - fclose (f); - return sz; -} - -#elif __APPLE__ - -size_t get_mem_usage_kb () -{ - fprintf(stderr, "WARN: get_mem_usage_kb not implemented on the mac\n"); - return 0; -} - -#endif - - - -/************************************************** - * Random data generation functions - **************************************************/ - -RandomGenerator::RandomGenerator (int64_t seed) - : mt((unsigned int)seed) {} - -int RandomGenerator::rand_int () -{ - return mt() & 0x7fffffff; -} - -int64_t RandomGenerator::rand_int64 () -{ - return int64_t(rand_int()) | int64_t(rand_int()) << 31; -} - -int RandomGenerator::rand_int (int max) -{ - return mt() % max; -} - -float RandomGenerator::rand_float () -{ - return mt() / float(mt.max()); -} - -double RandomGenerator::rand_double () -{ - return mt() / double(mt.max()); -} - - -/*********************************************************************** - * Random functions in this C file only exist because Torch - * counterparts are slow and not multi-threaded. Typical use is for - * more than 1-100 billion values. */ - - -/* Generate a set of random floating point values such that x[i] in [0,1] - multi-threading. For this reason, we rely on re-entreant functions. */ -void float_rand (float * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - - RandomGenerator rng (a0 + j * b0); - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - - for (size_t i = istart; i < iend; i++) - x[i] = rng.rand_float (); - } -} - - -void float_randn (float * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - RandomGenerator rng (a0 + j * b0); - - double a = 0, b = 0, s = 0; - int state = 0; /* generate two number per "do-while" loop */ - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - - for (size_t i = istart; i < iend; i++) { - /* Marsaglia's method (see Knuth) */ - if (state == 0) { - do { - a = 2.0 * rng.rand_double () - 1; - b = 2.0 * rng.rand_double () - 1; - s = a * a + b * b; - } while (s >= 1.0); - x[i] = a * sqrt(-2.0 * log(s) / s); - } - else - x[i] = b * sqrt(-2.0 * log(s) / s); - state = 1 - state; - } - } -} - - -/* Integer versions */ -void int64_rand (int64_t * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - - RandomGenerator rng (a0 + j * b0); - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - for (size_t i = istart; i < iend; i++) - x[i] = rng.rand_int64 (); - } -} - - - -void rand_perm (int *perm, size_t n, int64_t seed) -{ - for (size_t i = 0; i < n; i++) perm[i] = i; - - RandomGenerator rng (seed); - - for (size_t i = 0; i + 1 < n; i++) { - int i2 = i + rng.rand_int (n - i); - std::swap(perm[i], perm[i2]); - } -} - - - - -void byte_rand (uint8_t * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - - RandomGenerator rng (a0 + j * b0); - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - - size_t i; - for (i = istart; i < iend; i++) - x[i] = rng.rand_int64 (); - } -} - - - -void reflection (const float * __restrict u, - float * __restrict x, - size_t n, size_t d, size_t nu) -{ - size_t i, j, l; - for (i = 0; i < n; i++) { - const float * up = u; - for (l = 0; l < nu; l++) { - float ip1 = 0, ip2 = 0; - - for (j = 0; j < d; j+=2) { - ip1 += up[j] * x[j]; - ip2 += up[j+1] * x[j+1]; - } - float ip = 2 * (ip1 + ip2); - - for (j = 0; j < d; j++) - x[j] -= ip * up[j]; - up += d; - } - x += d; - } -} - - -/* Reference implementation (slower) */ -void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu) -{ - size_t i, j, l; - for (i = 0; i < n; i++) { - const float * up = u; - for (l = 0; l < nu; l++) { - double ip = 0; - - for (j = 0; j < d; j++) - ip += up[j] * x[j]; - ip *= 2; - - for (j = 0; j < d; j++) - x[j] -= ip * up[j]; - - up += d; - } - x += d; - } -} - - - - - -/*************************************************************************** - * Matrix/vector ops - ***************************************************************************/ - - - -/* Compute the inner product between a vector x and - a set of ny vectors y. - These functions are not intended to replace BLAS matrix-matrix, as they - would be significantly less efficient in this case. */ -void fvec_inner_products_ny (float * ip, - const float * x, - const float * y, - size_t d, size_t ny) -{ - // Not sure which one is fastest -#if 0 - { - FINTEGER di = d; - FINTEGER nyi = ny; - float one = 1.0, zero = 0.0; - FINTEGER onei = 1; - sgemv_ ("T", &di, &nyi, &one, y, &di, x, &onei, &zero, ip, &onei); - } -#endif - for (size_t i = 0; i < ny; i++) { - ip[i] = fvec_inner_product (x, y, d); - y += d; - } -} - - - - - -/* Compute the L2 norm of a set of nx vectors */ -void fvec_norms_L2 (float * __restrict nr, - const float * __restrict x, - size_t d, size_t nx) -{ - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - nr[i] = sqrtf (fvec_norm_L2sqr (x + i * d, d)); - } -} - -void fvec_norms_L2sqr (float * __restrict nr, - const float * __restrict x, - size_t d, size_t nx) -{ -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) - nr[i] = fvec_norm_L2sqr (x + i * d, d); -} - - - -void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x) -{ -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - float * __restrict xi = x + i * d; - - float nr = fvec_norm_L2sqr (xi, d); - - if (nr > 0) { - size_t j; - const float inv_nr = 1.0 / sqrtf (nr); - for (j = 0; j < d; j++) - xi[j] *= inv_nr; - } - } -} - - - - - - - - - - - - -/*************************************************************************** - * KNN functions - ***************************************************************************/ - - - -/* Find the nearest neighbors for nx queries in a set of ny vectors */ -static void knn_inner_product_sse (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - size_t k = res->k; - size_t check_period = InterruptCallback::get_period_hint (ny * d); - - check_period *= omp_get_max_threads(); - - for (size_t i0 = 0; i0 < nx; i0 += check_period) { - size_t i1 = std::min(i0 + check_period, nx); - -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - const float * x_i = x + i * d; - const float * y_j = y; - - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - - minheap_heapify (k, simi, idxi); - - for (size_t j = 0; j < ny; j++) { - float ip = fvec_inner_product (x_i, y_j, d); - - if (ip > simi[0]) { - minheap_pop (k, simi, idxi); - minheap_push (k, simi, idxi, ip, j); - } - y_j += d; - } - minheap_reorder (k, simi, idxi); - } - InterruptCallback::check (); - } - -} - -static void knn_L2sqr_sse ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - size_t k = res->k; - - size_t check_period = InterruptCallback::get_period_hint (ny * d); - check_period *= omp_get_max_threads(); - - for (size_t i0 = 0; i0 < nx; i0 += check_period) { - size_t i1 = std::min(i0 + check_period, nx); - -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - const float * x_i = x + i * d; - const float * y_j = y; - size_t j; - float * simi = res->get_val(i); - int64_t * idxi = res->get_ids (i); - - maxheap_heapify (k, simi, idxi); - for (j = 0; j < ny; j++) { - float disij = fvec_L2sqr (x_i, y_j, d); - - if (disij < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, disij, j); - } - y_j += d; - } - maxheap_reorder (k, simi, idxi); - } - InterruptCallback::check (); - } - -} - - -/** Find the nearest neighbors for nx queries in a set of ny vectors */ -static void knn_inner_product_blas ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - res->heapify (); - - // BLAS does not like empty matrices - if (nx == 0 || ny == 0) return; - - /* block sizes */ - const size_t bs_x = 4096, bs_y = 1024; - // const size_t bs_x = 16, bs_y = 16; - std::unique_ptr ip_block(new float[bs_x * bs_y]); - - for (size_t i0 = 0; i0 < nx; i0 += bs_x) { - size_t i1 = i0 + bs_x; - if(i1 > nx) i1 = nx; - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - /* compute the actual dot products */ - { - float one = 1, zero = 0; - FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; - sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, - y + j0 * d, &di, - x + i0 * d, &di, &zero, - ip_block.get(), &nyi); - } - - /* collect maxima */ - res->addn (j1 - j0, ip_block.get(), j0, i0, i1 - i0); - } - InterruptCallback::check (); - } - res->reorder (); -} - -// distance correction is an operator that can be applied to transform -// the distances -template -static void knn_L2sqr_blas (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res, - const DistanceCorrection &corr) -{ - res->heapify (); - - // BLAS does not like empty matrices - if (nx == 0 || ny == 0) return; - - size_t k = res->k; - - /* block sizes */ - const size_t bs_x = 4096, bs_y = 1024; - // const size_t bs_x = 16, bs_y = 16; - float *ip_block = new float[bs_x * bs_y]; - float *x_norms = new float[nx]; - float *y_norms = new float[ny]; - ScopeDeleter del1(ip_block), del3(x_norms), del2(y_norms); - - fvec_norms_L2sqr (x_norms, x, d, nx); - fvec_norms_L2sqr (y_norms, y, d, ny); - - - for (size_t i0 = 0; i0 < nx; i0 += bs_x) { - size_t i1 = i0 + bs_x; - if(i1 > nx) i1 = nx; - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - /* compute the actual dot products */ - { - float one = 1, zero = 0; - FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; - sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, - y + j0 * d, &di, - x + i0 * d, &di, &zero, - ip_block, &nyi); - } - - /* collect minima */ -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - const float *ip_line = ip_block + (i - i0) * (j1 - j0); - - for (size_t j = j0; j < j1; j++) { - float ip = *ip_line++; - float dis = x_norms[i] + y_norms[j] - 2 * ip; - - // negative values can occur for identical vectors - // due to roundoff errors - if (dis < 0) dis = 0; - - dis = corr (dis, i, j); - - if (dis < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, dis, j); - } - } - } - } - InterruptCallback::check (); - } - res->reorder (); - -} - - - - - - - - - -/******************************************************* - * KNN driver functions - *******************************************************/ - -int distance_compute_blas_threshold = 20; - -void knn_inner_product (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - knn_inner_product_sse (x, y, d, nx, ny, res); - } else { - knn_inner_product_blas (x, y, d, nx, ny, res); - } -} - - - -struct NopDistanceCorrection { - float operator()(float dis, size_t /*qno*/, size_t /*bno*/) const { - return dis; - } -}; - -void knn_L2sqr (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - knn_L2sqr_sse (x, y, d, nx, ny, res); - } else { - NopDistanceCorrection nop; - knn_L2sqr_blas (x, y, d, nx, ny, res, nop); - } -} - -struct BaseShiftDistanceCorrection { - const float *base_shift; - float operator()(float dis, size_t /*qno*/, size_t bno) const { - return dis - base_shift[bno]; - } -}; - -void knn_L2sqr_base_shift ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res, - const float *base_shift) -{ - BaseShiftDistanceCorrection corr = {base_shift}; - knn_L2sqr_blas (x, y, d, nx, ny, res, corr); -} - - - -/*************************************************************************** - * compute a subset of distances - ***************************************************************************/ - -/* compute the inner product between x and a subset y of ny vectors, - whose indices are given by idy. */ -void fvec_inner_products_by_idx (float * __restrict ip, - const float * x, - const float * y, - const int64_t * __restrict ids, /* for y vecs */ - size_t d, size_t nx, size_t ny) -{ -#pragma omp parallel for - for (size_t j = 0; j < nx; j++) { - const int64_t * __restrict idsj = ids + j * ny; - const float * xj = x + j * d; - float * __restrict ipj = ip + j * ny; - for (size_t i = 0; i < ny; i++) { - if (idsj[i] < 0) - continue; - ipj[i] = fvec_inner_product (xj, y + d * idsj[i], d); - } - } -} - -/* compute the inner product between x and a subset y of ny vectors, - whose indices are given by idy. */ -void fvec_L2sqr_by_idx (float * __restrict dis, - const float * x, - const float * y, - const int64_t * __restrict ids, /* ids of y vecs */ - size_t d, size_t nx, size_t ny) -{ -#pragma omp parallel for - for (size_t j = 0; j < nx; j++) { - const int64_t * __restrict idsj = ids + j * ny; - const float * xj = x + j * d; - float * __restrict disj = dis + j * ny; - for (size_t i = 0; i < ny; i++) { - if (idsj[i] < 0) - continue; - disj[i] = fvec_L2sqr (xj, y + d * idsj[i], d); - } - } -} - - - - - -/* Find the nearest neighbors for nx queries in a set of ny vectors - indexed by ids. May be useful for re-ranking a pre-selected vector list */ -void knn_inner_products_by_idx (const float * x, - const float * y, - const int64_t * ids, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - size_t k = res->k; - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - const float * x_ = x + i * d; - const int64_t * idsi = ids + i * ny; - size_t j; - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - minheap_heapify (k, simi, idxi); - - for (j = 0; j < ny; j++) { - if (idsi[j] < 0) break; - float ip = fvec_inner_product (x_, y + d * idsi[j], d); - - if (ip > simi[0]) { - minheap_pop (k, simi, idxi); - minheap_push (k, simi, idxi, ip, idsi[j]); - } - } - minheap_reorder (k, simi, idxi); - } - -} - -void knn_L2sqr_by_idx (const float * x, - const float * y, - const int64_t * __restrict ids, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - size_t k = res->k; - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - const float * x_ = x + i * d; - const int64_t * __restrict idsi = ids + i * ny; - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - maxheap_heapify (res->k, simi, idxi); - for (size_t j = 0; j < ny; j++) { - float disij = fvec_L2sqr (x_, y + d * idsi[j], d); - - if (disij < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, disij, idsi[j]); - } - } - maxheap_reorder (res->k, simi, idxi); - } - -} - - - - - -/*************************************************************************** - * Range search - ***************************************************************************/ - -/** Find the nearest neighbors for nx queries in a set of ny vectors - * compute_l2 = compute pairwise squared L2 distance rather than inner prod - */ -template -static void range_search_blas ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *result) -{ - - // BLAS does not like empty matrices - if (nx == 0 || ny == 0) return; - - /* block sizes */ - const size_t bs_x = 4096, bs_y = 1024; - // const size_t bs_x = 16, bs_y = 16; - float *ip_block = new float[bs_x * bs_y]; - ScopeDeleter del0(ip_block); - - float *x_norms = nullptr, *y_norms = nullptr; - ScopeDeleter del1, del2; - if (compute_l2) { - x_norms = new float[nx]; - del1.set (x_norms); - fvec_norms_L2sqr (x_norms, x, d, nx); - - y_norms = new float[ny]; - del2.set (y_norms); - fvec_norms_L2sqr (y_norms, y, d, ny); - } - - std::vector partial_results; - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - RangeSearchPartialResult * pres = new RangeSearchPartialResult (result); - partial_results.push_back (pres); - - for (size_t i0 = 0; i0 < nx; i0 += bs_x) { - size_t i1 = i0 + bs_x; - if(i1 > nx) i1 = nx; - - /* compute the actual dot products */ - { - float one = 1, zero = 0; - FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; - sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, - y + j0 * d, &di, - x + i0 * d, &di, &zero, - ip_block, &nyi); - } - - - for (size_t i = i0; i < i1; i++) { - const float *ip_line = ip_block + (i - i0) * (j1 - j0); - - RangeQueryResult & qres = pres->new_result (i); - - for (size_t j = j0; j < j1; j++) { - float ip = *ip_line++; - if (compute_l2) { - float dis = x_norms[i] + y_norms[j] - 2 * ip; - if (dis < radius) { - qres.add (dis, j); - } - } else { - if (ip > radius) { - qres.add (ip, j); - } - } - } - } - } - InterruptCallback::check (); - } - - RangeSearchPartialResult::merge (partial_results); -} - - -template -static void range_search_sse (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *res) -{ - FAISS_THROW_IF_NOT (d % 4 == 0); - -#pragma omp parallel - { - RangeSearchPartialResult pres (res); - -#pragma omp for - for (size_t i = 0; i < nx; i++) { - const float * x_ = x + i * d; - const float * y_ = y; - size_t j; - - RangeQueryResult & qres = pres.new_result (i); - - for (j = 0; j < ny; j++) { - if (compute_l2) { - float disij = fvec_L2sqr (x_, y_, d); - if (disij < radius) { - qres.add (disij, j); - } - } else { - float ip = fvec_inner_product (x_, y_, d); - if (ip > radius) { - qres.add (ip, j); - } - } - y_ += d; - } - - } - pres.finalize (); - } - - // check just at the end because the use case is typically just - // when the nb of queries is low. - InterruptCallback::check(); -} - - - - - -void range_search_L2sqr ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *res) -{ - - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - range_search_sse (x, y, d, nx, ny, radius, res); - } else { - range_search_blas (x, y, d, nx, ny, radius, res); - } -} - -void range_search_inner_product ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *res) -{ - - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - range_search_sse (x, y, d, nx, ny, radius, res); - } else { - range_search_blas (x, y, d, nx, ny, radius, res); - } -} - - - -/*************************************************************************** - * Some matrix manipulation functions - ***************************************************************************/ - - -/* This function exists because the Torch counterpart is extremly slow - (not multi-threaded + unexpected overhead even in single thread). - It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2 */ -void inner_product_to_L2sqr (float * __restrict dis, - const float * nr1, - const float * nr2, - size_t n1, size_t n2) -{ - -#pragma omp parallel for - for (size_t j = 0 ; j < n1 ; j++) { - float * disj = dis + j * n2; - for (size_t i = 0 ; i < n2 ; i++) - disj[i] = nr1[j] + nr2[i] - 2 * disj[i]; - } -} - - -void matrix_qr (int m, int n, float *a) -{ - FAISS_THROW_IF_NOT (m >= n); - FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni; - std::vector tau (ki); - FINTEGER lwork = -1, info; - float work_size; - - sgeqrf_ (&mi, &ni, a, &mi, tau.data(), - &work_size, &lwork, &info); - lwork = size_t(work_size); - std::vector work (lwork); - - sgeqrf_ (&mi, &ni, a, &mi, - tau.data(), work.data(), &lwork, &info); - - sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(), - work.data(), &lwork, &info); - -} - - -void pairwise_L2sqr (int64_t d, - int64_t nq, const float *xq, - int64_t nb, const float *xb, - float *dis, - int64_t ldq, int64_t ldb, int64_t ldd) -{ - if (nq == 0 || nb == 0) return; - if (ldq == -1) ldq = d; - if (ldb == -1) ldb = d; - if (ldd == -1) ldd = nb; - - // store in beginning of distance matrix to avoid malloc - float *b_norms = dis; - -#pragma omp parallel for - for (int64_t i = 0; i < nb; i++) - b_norms [i] = fvec_norm_L2sqr (xb + i * ldb, d); - -#pragma omp parallel for - for (int64_t i = 1; i < nq; i++) { - float q_norm = fvec_norm_L2sqr (xq + i * ldq, d); - for (int64_t j = 0; j < nb; j++) - dis[i * ldd + j] = q_norm + b_norms [j]; - } - - { - float q_norm = fvec_norm_L2sqr (xq, d); - for (int64_t j = 0; j < nb; j++) - dis[j] += q_norm; - } - - { - FINTEGER nbi = nb, nqi = nq, di = d, ldqi = ldq, ldbi = ldb, lddi = ldd; - float one = 1.0, minus_2 = -2.0; - - sgemm_ ("Transposed", "Not transposed", - &nbi, &nqi, &di, - &minus_2, - xb, &ldbi, - xq, &ldqi, - &one, dis, &lddi); - } - -} - -/*************************************************************************** - * Kmeans subroutine - ***************************************************************************/ - -// a bit above machine epsilon for float16 - -#define EPS (1 / 1024.) - -/* For k-means, compute centroids given assignment of vectors to centroids */ -int km_update_centroids (const float * x, - float * centroids, - int64_t * assign, - size_t d, size_t k, size_t n, - size_t k_frozen) -{ - k -= k_frozen; - centroids += k_frozen * d; - - std::vector hassign(k); - memset (centroids, 0, sizeof(*centroids) * d * k); - -#pragma omp parallel - { - int nt = omp_get_num_threads(); - int rank = omp_get_thread_num(); - // this thread is taking care of centroids c0:c1 - size_t c0 = (k * rank) / nt; - size_t c1 = (k * (rank + 1)) / nt; - const float *xi = x; - size_t nacc = 0; - - for (size_t i = 0; i < n; i++) { - int64_t ci = assign[i]; - assert (ci >= 0 && ci < k + k_frozen); - ci -= k_frozen; - if (ci >= c0 && ci < c1) { - float * c = centroids + ci * d; - hassign[ci]++; - for (size_t j = 0; j < d; j++) - c[j] += xi[j]; - nacc++; - } - xi += d; - } - - } - -#pragma omp parallel for - for (size_t ci = 0; ci < k; ci++) { - float * c = centroids + ci * d; - float ni = (float) hassign[ci]; - if (ni != 0) { - for (size_t j = 0; j < d; j++) - c[j] /= ni; - } - } - - /* Take care of void clusters */ - size_t nsplit = 0; - RandomGenerator rng (1234); - for (size_t ci = 0; ci < k; ci++) { - if (hassign[ci] == 0) { /* need to redefine a centroid */ - size_t cj; - for (cj = 0; 1; cj = (cj + 1) % k) { - /* probability to pick this cluster for split */ - float p = (hassign[cj] - 1.0) / (float) (n - k); - float r = rng.rand_float (); - if (r < p) { - break; /* found our cluster to be split */ - } - } - memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d); - - /* small symmetric pertubation. Much better than */ - for (size_t j = 0; j < d; j++) { - if (j % 2 == 0) { - centroids[ci * d + j] *= 1 + EPS; - centroids[cj * d + j] *= 1 - EPS; - } else { - centroids[ci * d + j] *= 1 - EPS; - centroids[cj * d + j] *= 1 + EPS; - } - } - - /* assume even split of the cluster */ - hassign[ci] = hassign[cj] / 2; - hassign[cj] -= hassign[ci]; - nsplit++; - } - } - - return nsplit; -} - -#undef EPS - - - -/*************************************************************************** - * Result list routines - ***************************************************************************/ - - -void ranklist_handle_ties (int k, int64_t *idx, const float *dis) -{ - float prev_dis = -1e38; - int prev_i = -1; - for (int i = 0; i < k; i++) { - if (dis[i] != prev_dis) { - if (i > prev_i + 1) { - // sort between prev_i and i - 1 - std::sort (idx + prev_i, idx + i); - } - prev_i = i; - prev_dis = dis[i]; - } - } -} - -size_t merge_result_table_with (size_t n, size_t k, - int64_t *I0, float *D0, - const int64_t *I1, const float *D1, - bool keep_min, - int64_t translation) -{ - size_t n1 = 0; - -#pragma omp parallel reduction(+:n1) - { - std::vector tmpI (k); - std::vector tmpD (k); - -#pragma omp for - for (size_t i = 0; i < n; i++) { - int64_t *lI0 = I0 + i * k; - float *lD0 = D0 + i * k; - const int64_t *lI1 = I1 + i * k; - const float *lD1 = D1 + i * k; - size_t r0 = 0; - size_t r1 = 0; - - if (keep_min) { - for (size_t j = 0; j < k; j++) { - - if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) { - tmpD[j] = lD0[r0]; - tmpI[j] = lI0[r0]; - r0++; - } else if (lD1[r1] >= 0) { - tmpD[j] = lD1[r1]; - tmpI[j] = lI1[r1] + translation; - r1++; - } else { // both are NaNs - tmpD[j] = NAN; - tmpI[j] = -1; - } - } - } else { - for (size_t j = 0; j < k; j++) { - if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) { - tmpD[j] = lD0[r0]; - tmpI[j] = lI0[r0]; - r0++; - } else if (lD1[r1] >= 0) { - tmpD[j] = lD1[r1]; - tmpI[j] = lI1[r1] + translation; - r1++; - } else { // both are NaNs - tmpD[j] = NAN; - tmpI[j] = -1; - } - } - } - n1 += r1; - memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k); - memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k); - } - } - - return n1; -} - - - -size_t ranklist_intersection_size (size_t k1, const int64_t *v1, - size_t k2, const int64_t *v2_in) -{ - if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1); - int64_t *v2 = new int64_t [k2]; - memcpy (v2, v2_in, sizeof (int64_t) * k2); - std::sort (v2, v2 + k2); - { // de-dup v2 - int64_t prev = -1; - size_t wp = 0; - for (size_t i = 0; i < k2; i++) { - if (v2 [i] != prev) { - v2[wp++] = prev = v2 [i]; - } - } - k2 = wp; - } - const int64_t seen_flag = 1L << 60; - size_t count = 0; - for (size_t i = 0; i < k1; i++) { - int64_t q = v1 [i]; - size_t i0 = 0, i1 = k2; - while (i0 + 1 < i1) { - size_t imed = (i1 + i0) / 2; - int64_t piv = v2 [imed] & ~seen_flag; - if (piv <= q) i0 = imed; - else i1 = imed; - } - if (v2 [i0] == q) { - count++; - v2 [i0] |= seen_flag; - } - } - delete [] v2; - - return count; -} - -double imbalance_factor (int k, const int *hist) { - double tot = 0, uf = 0; - - for (int i = 0 ; i < k ; i++) { - tot += hist[i]; - uf += hist[i] * (double) hist[i]; - } - uf = uf * k / (tot * tot); - - return uf; -} - - -double imbalance_factor (int n, int k, const int64_t *assign) { - std::vector hist(k, 0); - for (int i = 0; i < n; i++) { - hist[assign[i]]++; - } - - return imbalance_factor (k, hist.data()); -} - - - -int ivec_hist (size_t n, const int * v, int vmax, int *hist) { - memset (hist, 0, sizeof(hist[0]) * vmax); - int nout = 0; - while (n--) { - if (v[n] < 0 || v[n] >= vmax) nout++; - else hist[v[n]]++; - } - return nout; -} - - -void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist) -{ - FAISS_THROW_IF_NOT (nbits % 8 == 0); - size_t d = nbits / 8; - std::vector accu(d * 256); - const uint8_t *c = codes; - for (size_t i = 0; i < n; i++) - for(int j = 0; j < d; j++) - accu[j * 256 + *c++]++; - memset (hist, 0, sizeof(*hist) * nbits); - for (int i = 0; i < d; i++) { - const int *ai = accu.data() + i * 256; - int * hi = hist + i * 8; - for (int j = 0; j < 256; j++) - for (int k = 0; k < 8; k++) - if ((j >> k) & 1) - hi[k] += ai[j]; - } - -} - - - -size_t ivec_checksum (size_t n, const int *a) -{ - size_t cs = 112909; - while (n--) cs = cs * 65713 + a[n] * 1686049; - return cs; -} - - -namespace { - struct ArgsortComparator { - const float *vals; - bool operator() (const size_t a, const size_t b) const { - return vals[a] < vals[b]; - } - }; - - struct SegmentS { - size_t i0; // begin pointer in the permutation array - size_t i1; // end - size_t len() const { - return i1 - i0; - } - }; - - // see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge - // extended to > 1 merge thread - - // merges 2 ranges that should be consecutive on the source into - // the union of the two on the destination - template - void parallel_merge (const T *src, T *dst, - SegmentS &s1, SegmentS & s2, int nt, - const ArgsortComparator & comp) { - if (s2.len() > s1.len()) { // make sure that s1 larger than s2 - std::swap(s1, s2); - } - - // compute sub-ranges for each thread - SegmentS s1s[nt], s2s[nt], sws[nt]; - s2s[0].i0 = s2.i0; - s2s[nt - 1].i1 = s2.i1; - - // not sure parallel actually helps here -#pragma omp parallel for num_threads(nt) - for (int t = 0; t < nt; t++) { - s1s[t].i0 = s1.i0 + s1.len() * t / nt; - s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt; - - if (t + 1 < nt) { - T pivot = src[s1s[t].i1]; - size_t i0 = s2.i0, i1 = s2.i1; - while (i0 + 1 < i1) { - size_t imed = (i1 + i0) / 2; - if (comp (pivot, src[imed])) {i1 = imed; } - else {i0 = imed; } - } - s2s[t].i1 = s2s[t + 1].i0 = i1; - } - } - s1.i0 = std::min(s1.i0, s2.i0); - s1.i1 = std::max(s1.i1, s2.i1); - s2 = s1; - sws[0].i0 = s1.i0; - for (int t = 0; t < nt; t++) { - sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len(); - if (t + 1 < nt) { - sws[t + 1].i0 = sws[t].i1; - } - } - assert(sws[nt - 1].i1 == s1.i1); - - // do the actual merging -#pragma omp parallel for num_threads(nt) - for (int t = 0; t < nt; t++) { - SegmentS sw = sws[t]; - SegmentS s1t = s1s[t]; - SegmentS s2t = s2s[t]; - if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) { - for (;;) { - // assert (sw.len() == s1t.len() + s2t.len()); - if (comp(src[s1t.i0], src[s2t.i0])) { - dst[sw.i0++] = src[s1t.i0++]; - if (s1t.i0 == s1t.i1) break; - } else { - dst[sw.i0++] = src[s2t.i0++]; - if (s2t.i0 == s2t.i1) break; - } - } - } - if (s1t.len() > 0) { - assert(s1t.len() == sw.len()); - memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0])); - } else if (s2t.len() > 0) { - assert(s2t.len() == sw.len()); - memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0])); - } - } - } - -}; - -void fvec_argsort (size_t n, const float *vals, - size_t *perm) -{ - for (size_t i = 0; i < n; i++) perm[i] = i; - ArgsortComparator comp = {vals}; - std::sort (perm, perm + n, comp); -} - -void fvec_argsort_parallel (size_t n, const float *vals, - size_t *perm) -{ - size_t * perm2 = new size_t[n]; - // 2 result tables, during merging, flip between them - size_t *permB = perm2, *permA = perm; - - int nt = omp_get_max_threads(); - { // prepare correct permutation so that the result ends in perm - // at final iteration - int nseg = nt; - while (nseg > 1) { - nseg = (nseg + 1) / 2; - std::swap (permA, permB); - } - } - -#pragma omp parallel - for (size_t i = 0; i < n; i++) permA[i] = i; - - ArgsortComparator comp = {vals}; - - SegmentS segs[nt]; - - // independent sorts -#pragma omp parallel for - for (int t = 0; t < nt; t++) { - size_t i0 = t * n / nt; - size_t i1 = (t + 1) * n / nt; - SegmentS seg = {i0, i1}; - std::sort (permA + seg.i0, permA + seg.i1, comp); - segs[t] = seg; - } - int prev_nested = omp_get_nested(); - omp_set_nested(1); - - int nseg = nt; - while (nseg > 1) { - int nseg1 = (nseg + 1) / 2; - int sub_nt = nseg % 2 == 0 ? nt : nt - 1; - int sub_nseg1 = nseg / 2; - -#pragma omp parallel for num_threads(nseg1) - for (int s = 0; s < nseg; s += 2) { - if (s + 1 == nseg) { // otherwise isolated segment - memcpy(permB + segs[s].i0, permA + segs[s].i0, - segs[s].len() * sizeof(size_t)); - } else { - int t0 = s * sub_nt / sub_nseg1; - int t1 = (s + 1) * sub_nt / sub_nseg1; - printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0); - parallel_merge(permA, permB, segs[s], segs[s + 1], - t1 - t0, comp); - } - } - for (int s = 0; s < nseg; s += 2) - segs[s / 2] = segs[s]; - nseg = nseg1; - std::swap (permA, permB); - } - assert (permA == perm); - omp_set_nested(prev_nested); - delete [] perm2; -} - - - - - - - - - - - - - - - - - - -const float *fvecs_maybe_subsample ( - size_t d, size_t *n, size_t nmax, const float *x, - bool verbose, int64_t seed) -{ - - if (*n <= nmax) return x; // nothing to do - - size_t n2 = nmax; - if (verbose) { - printf (" Input training set too big (max size is %ld), sampling " - "%ld / %ld vectors\n", nmax, n2, *n); - } - std::vector subset (*n); - rand_perm (subset.data (), *n, seed); - float *x_subset = new float[n2 * d]; - for (int64_t i = 0; i < n2; i++) - memcpy (&x_subset[i * d], - &x[subset[i] * size_t(d)], - sizeof (x[0]) * d); - *n = n2; - return x_subset; -} - - -void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) { - for (size_t i = 0; i < d; ++i) { - x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1; - } -} - -void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) { - for (size_t i = 0; i < d / 8; ++i) { - uint8_t b = 0; - for (int j = 0; j < 8; ++j) { - if (x_in[8 * i + j] > 0) { - b |= (1 << j); - } - } - x_out[i] = b; - } -} - - -// from Python's stringobject.c -uint64_t hash_bytes (const uint8_t *bytes, int64_t n) { - const uint8_t *p = bytes; - uint64_t x = (uint64_t)(*p) << 7; - int64_t len = n; - while (--len >= 0) { - x = (1000003*x) ^ *p++; - } - x ^= n; - return x; -} - - -bool check_openmp() { - omp_set_num_threads(10); - - if (omp_get_max_threads() != 10) { - return false; - } - - std::vector nt_per_thread(10); - size_t sum = 0; - bool in_parallel = true; -#pragma omp parallel reduction(+: sum) - { - if (!omp_in_parallel()) { - in_parallel = false; - } - - int nt = omp_get_num_threads(); - int rank = omp_get_thread_num(); - - nt_per_thread[rank] = nt; -#pragma omp for - for(int i = 0; i < 1000 * 1000 * 10; i++) { - sum += i; - } - } - - if (!in_parallel) { - return false; - } - if (nt_per_thread[0] != 10) { - return false; - } - if (sum == 0) { - return false; - } - - return true; -} - -} // namespace faiss diff --git a/utils.h b/utils.h deleted file mode 100644 index 6d802a5533..0000000000 --- a/utils.h +++ /dev/null @@ -1,418 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -/* - * A few utilitary functions for similarity search: - * - random generators - * - optimized exhaustive distance and knn search functions - * - some functions reimplemented from torch for speed - */ - -#ifndef FAISS_utils_h -#define FAISS_utils_h - -#include -#include - -#include "Heap.h" - - -namespace faiss { - - -/************************************************** - * Get some stats about the system -**************************************************/ - - -/// ms elapsed since some arbitrary epoch -double getmillisecs (); - -/// get current RSS usage in kB -size_t get_mem_usage_kb (); - - -/************************************************** - * Random data generation functions - **************************************************/ - -/// random generator that can be used in multithreaded contexts -struct RandomGenerator { - - std::mt19937 mt; - - /// random positive integer - int rand_int (); - - /// random int64_t - int64_t rand_int64 (); - - /// generate random integer between 0 and max-1 - int rand_int (int max); - - /// between 0 and 1 - float rand_float (); - - double rand_double (); - - explicit RandomGenerator (int64_t seed = 1234); -}; - -/* Generate an array of uniform random floats / multi-threaded implementation */ -void float_rand (float * x, size_t n, int64_t seed); -void float_randn (float * x, size_t n, int64_t seed); -void int64_rand (int64_t * x, size_t n, int64_t seed); -void byte_rand (uint8_t * x, size_t n, int64_t seed); - -/* random permutation */ -void rand_perm (int * perm, size_t n, int64_t seed); - - - - /********************************************************* - * Optimized distance/norm/inner prod computations - *********************************************************/ - - -/// Squared L2 distance between two vectors -float fvec_L2sqr ( - const float * x, - const float * y, - size_t d); - -/// inner product -float fvec_inner_product ( - const float * x, - const float * y, - size_t d); - -/// L1 distance -float fvec_L1 ( - const float * x, - const float * y, - size_t d); - -float fvec_Linf ( - const float * x, - const float * y, - size_t d); - - -/// a balanced assignment has a IF of 1 -double imbalance_factor (int n, int k, const int64_t *assign); - -/// same, takes a histogram as input -double imbalance_factor (int k, const int *hist); - -/** Compute pairwise distances between sets of vectors - * - * @param d dimension of the vectors - * @param nq nb of query vectors - * @param nb nb of database vectors - * @param xq query vectors (size nq * d) - * @param xb database vectros (size nb * d) - * @param dis output distances (size nq * nb) - * @param ldq,ldb, ldd strides for the matrices - */ -void pairwise_L2sqr (int64_t d, - int64_t nq, const float *xq, - int64_t nb, const float *xb, - float *dis, - int64_t ldq = -1, int64_t ldb = -1, int64_t ldd = -1); - -/* compute the inner product between nx vectors x and one y */ -void fvec_inner_products_ny ( - float * ip, /* output inner product */ - const float * x, - const float * y, - size_t d, size_t ny); - -/* compute ny square L2 distance bewteen x and a set of contiguous y vectors */ -void fvec_L2sqr_ny ( - float * dis, - const float * x, - const float * y, - size_t d, size_t ny); - - -/** squared norm of a vector */ -float fvec_norm_L2sqr (const float * x, - size_t d); - -/** compute the L2 norms for a set of vectors - * - * @param ip output norms, size nx - * @param x set of vectors, size nx * d - */ -void fvec_norms_L2 (float * ip, const float * x, size_t d, size_t nx); - -/// same as fvec_norms_L2, but computes square norms -void fvec_norms_L2sqr (float * ip, const float * x, size_t d, size_t nx); - -/* L2-renormalize a set of vector. Nothing done if the vector is 0-normed */ -void fvec_renorm_L2 (size_t d, size_t nx, float * x); - - -/* This function exists because the Torch counterpart is extremly slow - (not multi-threaded + unexpected overhead even in single thread). - It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2 */ -void inner_product_to_L2sqr (float * dis, - const float * nr1, - const float * nr2, - size_t n1, size_t n2); - -/*************************************************************************** - * Compute a subset of distances - ***************************************************************************/ - - /* compute the inner product between x and a subset y of ny vectors, - whose indices are given by idy. */ -void fvec_inner_products_by_idx ( - float * ip, - const float * x, - const float * y, - const int64_t *ids, - size_t d, size_t nx, size_t ny); - -/* same but for a subset in y indexed by idsy (ny vectors in total) */ -void fvec_L2sqr_by_idx ( - float * dis, - const float * x, - const float * y, - const int64_t *ids, /* ids of y vecs */ - size_t d, size_t nx, size_t ny); - -/*************************************************************************** - * KNN functions - ***************************************************************************/ - -// threshold on nx above which we switch to BLAS to compute distances -extern int distance_compute_blas_threshold; - -/** Return the k nearest neighors of each of the nx vectors x among the ny - * vector y, w.r.t to max inner product - * - * @param x query vectors, size nx * d - * @param y database vectors, size ny * d - * @param res result array, which also provides k. Sorted on output - */ -void knn_inner_product ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res); - -/** Same as knn_inner_product, for the L2 distance */ -void knn_L2sqr ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res); - - - -/** same as knn_L2sqr, but base_shift[bno] is subtracted to all - * computed distances. - * - * @param base_shift size ny - */ -void knn_L2sqr_base_shift ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res, - const float *base_shift); - -/* Find the nearest neighbors for nx queries in a set of ny vectors - * indexed by ids. May be useful for re-ranking a pre-selected vector list - */ -void knn_inner_products_by_idx ( - const float * x, - const float * y, - const int64_t * ids, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res); - -void knn_L2sqr_by_idx (const float * x, - const float * y, - const int64_t * ids, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res); - -/*************************************************************************** - * Range search - ***************************************************************************/ - - - -/// Forward declaration, see AuxIndexStructures.h -struct RangeSearchResult; - -/** Return the k nearest neighors of each of the nx vectors x among the ny - * vector y, w.r.t to max inner product - * - * @param x query vectors, size nx * d - * @param y database vectors, size ny * d - * @param radius search radius around the x vectors - * @param result result structure - */ -void range_search_L2sqr ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *result); - -/// same as range_search_L2sqr for the inner product similarity -void range_search_inner_product ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *result); - - - - - -/*************************************************************************** - * Misc matrix and vector manipulation functions - ***************************************************************************/ - - -/** compute c := a + bf * b for a, b and c tables - * - * @param n size of the tables - * @param a size n - * @param b size n - * @param c restult table, size n - */ -void fvec_madd (size_t n, const float *a, - float bf, const float *b, float *c); - - -/** same as fvec_madd, also return index of the min of the result table - * @return index of the min of table c - */ -int fvec_madd_and_argmin (size_t n, const float *a, - float bf, const float *b, float *c); - - -/* perform a reflection (not an efficient implementation, just for test ) */ -void reflection (const float * u, float * x, size_t n, size_t d, size_t nu); - - -/** For k-means: update stage. - * - * @param x training vectors, size n * d - * @param centroids centroid vectors, size k * d - * @param assign nearest centroid for each training vector, size n - * @param k_frozen do not update the k_frozen first centroids - * @return nb of spliting operations to fight empty clusters - */ -int km_update_centroids ( - const float * x, - float * centroids, - int64_t * assign, - size_t d, size_t k, size_t n, - size_t k_frozen); - -/** compute the Q of the QR decomposition for m > n - * @param a size n * m: input matrix and output Q - */ -void matrix_qr (int m, int n, float *a); - -/** distances are supposed to be sorted. Sorts indices with same distance*/ -void ranklist_handle_ties (int k, int64_t *idx, const float *dis); - -/** count the number of comon elements between v1 and v2 - * algorithm = sorting + bissection to avoid double-counting duplicates - */ -size_t ranklist_intersection_size (size_t k1, const int64_t *v1, - size_t k2, const int64_t *v2); - -/** merge a result table into another one - * - * @param I0, D0 first result table, size (n, k) - * @param I1, D1 second result table, size (n, k) - * @param keep_min if true, keep min values, otherwise keep max - * @param translation add this value to all I1's indexes - * @return nb of values that were taken from the second table - */ -size_t merge_result_table_with (size_t n, size_t k, - int64_t *I0, float *D0, - const int64_t *I1, const float *D1, - bool keep_min = true, - int64_t translation = 0); - - - -void fvec_argsort (size_t n, const float *vals, - size_t *perm); - -void fvec_argsort_parallel (size_t n, const float *vals, - size_t *perm); - - -/// compute histogram on v -int ivec_hist (size_t n, const int * v, int vmax, int *hist); - -/** Compute histogram of bits on a code array - * - * @param codes size(n, nbits / 8) - * @param hist size(nbits): nb of 1s in the array of codes - */ -void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist); - - -/// compute a checksum on a table. -size_t ivec_checksum (size_t n, const int *a); - - -/** random subsamples a set of vectors if there are too many of them - * - * @param d dimension of the vectors - * @param n on input: nb of input vectors, output: nb of output vectors - * @param nmax max nb of vectors to keep - * @param x input array, size *n-by-d - * @param seed random seed to use for sampling - * @return x or an array allocated with new [] with *n vectors - */ -const float *fvecs_maybe_subsample ( - size_t d, size_t *n, size_t nmax, const float *x, - bool verbose = false, int64_t seed = 1234); - -/** Convert binary vector to +1/-1 valued float vector. - * - * @param d dimension of the vector (multiple of 8) - * @param x_in input binary vector (uint8_t table of size d / 8) - * @param x_out output float vector (float table of size d) - */ -void binary_to_real(size_t d, const uint8_t *x_in, float *x_out); - -/** Convert float vector to binary vector. Components > 0 are converted to 1, - * others to 0. - * - * @param d dimension of the vector (multiple of 8) - * @param x_in input float vector (float table of size d) - * @param x_out output binary vector (uint8_t table of size d / 8) - */ -void real_to_binary(size_t d, const float *x_in, uint8_t *x_out); - - -/** A reasonable hashing function */ -uint64_t hash_bytes (const uint8_t *bytes, int64_t n); - -/** Whether OpenMP annotations were respected. */ -bool check_openmp(); - -} // namspace faiss - - -#endif /* FAISS_utils_h */ diff --git a/utils_simd.cpp b/utils_simd.cpp deleted file mode 100644 index bb954a4310..0000000000 --- a/utils_simd.cpp +++ /dev/null @@ -1,815 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "utils.h" - -#include -#include -#include -#include - -#ifdef __SSE__ -#include -#endif - -#ifdef __aarch64__ -#include -#endif - -#include - - - -/************************************************** - * Get some stats about the system - **************************************************/ - -namespace faiss { - -#ifdef __AVX__ -#define USE_AVX -#endif - - -/********************************************************* - * Optimized distance computations - *********************************************************/ - - -/* Functions to compute: - - L2 distance between 2 vectors - - inner product between 2 vectors - - L2 norm of a vector - - The functions should probably not be invoked when a large number of - vectors are be processed in batch (in which case Matrix multiply - is faster), but may be useful for comparing vectors isolated in - memory. - - Works with any vectors of any dimension, even unaligned (in which - case they are slower). - -*/ - - -/********************************************************* - * Reference implementations - */ - - -float fvec_L2sqr_ref (const float * x, - const float * y, - size_t d) -{ - size_t i; - float res = 0; - for (i = 0; i < d; i++) { - const float tmp = x[i] - y[i]; - res += tmp * tmp; - } - return res; -} - -float fvec_L1_ref (const float * x, - const float * y, - size_t d) -{ - size_t i; - float res = 0; - for (i = 0; i < d; i++) { - const float tmp = x[i] - y[i]; - res += fabs(tmp); - } - return res; -} - -float fvec_Linf_ref (const float * x, - const float * y, - size_t d) -{ - size_t i; - float res = 0; - for (i = 0; i < d; i++) { - res = fmax(res, fabs(x[i] - y[i])); - } - return res; -} - -float fvec_inner_product_ref (const float * x, - const float * y, - size_t d) -{ - size_t i; - float res = 0; - for (i = 0; i < d; i++) - res += x[i] * y[i]; - return res; -} - -float fvec_norm_L2sqr_ref (const float *x, size_t d) -{ - size_t i; - double res = 0; - for (i = 0; i < d; i++) - res += x[i] * x[i]; - return res; -} - - -void fvec_L2sqr_ny_ref (float * dis, - const float * x, - const float * y, - size_t d, size_t ny) -{ - for (size_t i = 0; i < ny; i++) { - dis[i] = fvec_L2sqr (x, y, d); - y += d; - } -} - - - - -/********************************************************* - * SSE and AVX implementations - */ - -#ifdef __SSE__ - -// reads 0 <= d < 4 floats as __m128 -static inline __m128 masked_read (int d, const float *x) -{ - assert (0 <= d && d < 4); - __attribute__((__aligned__(16))) float buf[4] = {0, 0, 0, 0}; - switch (d) { - case 3: - buf[2] = x[2]; - case 2: - buf[1] = x[1]; - case 1: - buf[0] = x[0]; - } - return _mm_load_ps (buf); - // cannot use AVX2 _mm_mask_set1_epi32 -} - -float fvec_norm_L2sqr (const float * x, - size_t d) -{ - __m128 mx; - __m128 msum1 = _mm_setzero_ps(); - - while (d >= 4) { - mx = _mm_loadu_ps (x); x += 4; - msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx)); - d -= 4; - } - - mx = masked_read (d, x); - msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, mx)); - - msum1 = _mm_hadd_ps (msum1, msum1); - msum1 = _mm_hadd_ps (msum1, msum1); - return _mm_cvtss_f32 (msum1); -} - -namespace { - -float sqr (float x) { - return x * x; -} - - -void fvec_L2sqr_ny_D1 (float * dis, const float * x, - const float * y, size_t ny) -{ - float x0s = x[0]; - __m128 x0 = _mm_set_ps (x0s, x0s, x0s, x0s); - - size_t i; - for (i = 0; i + 3 < ny; i += 4) { - __m128 tmp, accu; - tmp = x0 - _mm_loadu_ps (y); y += 4; - accu = tmp * tmp; - dis[i] = _mm_cvtss_f32 (accu); - tmp = _mm_shuffle_ps (accu, accu, 1); - dis[i + 1] = _mm_cvtss_f32 (tmp); - tmp = _mm_shuffle_ps (accu, accu, 2); - dis[i + 2] = _mm_cvtss_f32 (tmp); - tmp = _mm_shuffle_ps (accu, accu, 3); - dis[i + 3] = _mm_cvtss_f32 (tmp); - } - while (i < ny) { // handle non-multiple-of-4 case - dis[i++] = sqr(x0s - *y++); - } -} - - -void fvec_L2sqr_ny_D2 (float * dis, const float * x, - const float * y, size_t ny) -{ - __m128 x0 = _mm_set_ps (x[1], x[0], x[1], x[0]); - - size_t i; - for (i = 0; i + 1 < ny; i += 2) { - __m128 tmp, accu; - tmp = x0 - _mm_loadu_ps (y); y += 4; - accu = tmp * tmp; - accu = _mm_hadd_ps (accu, accu); - dis[i] = _mm_cvtss_f32 (accu); - accu = _mm_shuffle_ps (accu, accu, 3); - dis[i + 1] = _mm_cvtss_f32 (accu); - } - if (i < ny) { // handle odd case - dis[i] = sqr(x[0] - y[0]) + sqr(x[1] - y[1]); - } -} - - - -void fvec_L2sqr_ny_D4 (float * dis, const float * x, - const float * y, size_t ny) -{ - __m128 x0 = _mm_loadu_ps(x); - - for (size_t i = 0; i < ny; i++) { - __m128 tmp, accu; - tmp = x0 - _mm_loadu_ps (y); y += 4; - accu = tmp * tmp; - accu = _mm_hadd_ps (accu, accu); - accu = _mm_hadd_ps (accu, accu); - dis[i] = _mm_cvtss_f32 (accu); - } -} - - -void fvec_L2sqr_ny_D8 (float * dis, const float * x, - const float * y, size_t ny) -{ - __m128 x0 = _mm_loadu_ps(x); - __m128 x1 = _mm_loadu_ps(x + 4); - - for (size_t i = 0; i < ny; i++) { - __m128 tmp, accu; - tmp = x0 - _mm_loadu_ps (y); y += 4; - accu = tmp * tmp; - tmp = x1 - _mm_loadu_ps (y); y += 4; - accu += tmp * tmp; - accu = _mm_hadd_ps (accu, accu); - accu = _mm_hadd_ps (accu, accu); - dis[i] = _mm_cvtss_f32 (accu); - } -} - - -void fvec_L2sqr_ny_D12 (float * dis, const float * x, - const float * y, size_t ny) -{ - __m128 x0 = _mm_loadu_ps(x); - __m128 x1 = _mm_loadu_ps(x + 4); - __m128 x2 = _mm_loadu_ps(x + 8); - - for (size_t i = 0; i < ny; i++) { - __m128 tmp, accu; - tmp = x0 - _mm_loadu_ps (y); y += 4; - accu = tmp * tmp; - tmp = x1 - _mm_loadu_ps (y); y += 4; - accu += tmp * tmp; - tmp = x2 - _mm_loadu_ps (y); y += 4; - accu += tmp * tmp; - accu = _mm_hadd_ps (accu, accu); - accu = _mm_hadd_ps (accu, accu); - dis[i] = _mm_cvtss_f32 (accu); - } -} - - -} // anonymous namespace - -void fvec_L2sqr_ny (float * dis, const float * x, - const float * y, size_t d, size_t ny) { - // optimized for a few special cases - switch(d) { - case 1: - fvec_L2sqr_ny_D1 (dis, x, y, ny); - return; - case 2: - fvec_L2sqr_ny_D2 (dis, x, y, ny); - return; - case 4: - fvec_L2sqr_ny_D4 (dis, x, y, ny); - return; - case 8: - fvec_L2sqr_ny_D8 (dis, x, y, ny); - return; - case 12: - fvec_L2sqr_ny_D12 (dis, x, y, ny); - return; - default: - fvec_L2sqr_ny_ref (dis, x, y, d, ny); - return; - } -} - - - -#endif - -#ifdef USE_AVX - -// reads 0 <= d < 8 floats as __m256 -static inline __m256 masked_read_8 (int d, const float *x) -{ - assert (0 <= d && d < 8); - if (d < 4) { - __m256 res = _mm256_setzero_ps (); - res = _mm256_insertf128_ps (res, masked_read (d, x), 0); - return res; - } else { - __m256 res = _mm256_setzero_ps (); - res = _mm256_insertf128_ps (res, _mm_loadu_ps (x), 0); - res = _mm256_insertf128_ps (res, masked_read (d - 4, x + 4), 1); - return res; - } -} - -float fvec_inner_product (const float * x, - const float * y, - size_t d) -{ - __m256 msum1 = _mm256_setzero_ps(); - - while (d >= 8) { - __m256 mx = _mm256_loadu_ps (x); x += 8; - __m256 my = _mm256_loadu_ps (y); y += 8; - msum1 = _mm256_add_ps (msum1, _mm256_mul_ps (mx, my)); - d -= 8; - } - - __m128 msum2 = _mm256_extractf128_ps(msum1, 1); - msum2 += _mm256_extractf128_ps(msum1, 0); - - if (d >= 4) { - __m128 mx = _mm_loadu_ps (x); x += 4; - __m128 my = _mm_loadu_ps (y); y += 4; - msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my)); - d -= 4; - } - - if (d > 0) { - __m128 mx = masked_read (d, x); - __m128 my = masked_read (d, y); - msum2 = _mm_add_ps (msum2, _mm_mul_ps (mx, my)); - } - - msum2 = _mm_hadd_ps (msum2, msum2); - msum2 = _mm_hadd_ps (msum2, msum2); - return _mm_cvtss_f32 (msum2); -} - -float fvec_L2sqr (const float * x, - const float * y, - size_t d) -{ - __m256 msum1 = _mm256_setzero_ps(); - - while (d >= 8) { - __m256 mx = _mm256_loadu_ps (x); x += 8; - __m256 my = _mm256_loadu_ps (y); y += 8; - const __m256 a_m_b1 = mx - my; - msum1 += a_m_b1 * a_m_b1; - d -= 8; - } - - __m128 msum2 = _mm256_extractf128_ps(msum1, 1); - msum2 += _mm256_extractf128_ps(msum1, 0); - - if (d >= 4) { - __m128 mx = _mm_loadu_ps (x); x += 4; - __m128 my = _mm_loadu_ps (y); y += 4; - const __m128 a_m_b1 = mx - my; - msum2 += a_m_b1 * a_m_b1; - d -= 4; - } - - if (d > 0) { - __m128 mx = masked_read (d, x); - __m128 my = masked_read (d, y); - __m128 a_m_b1 = mx - my; - msum2 += a_m_b1 * a_m_b1; - } - - msum2 = _mm_hadd_ps (msum2, msum2); - msum2 = _mm_hadd_ps (msum2, msum2); - return _mm_cvtss_f32 (msum2); -} - -float fvec_L1 (const float * x, const float * y, size_t d) -{ - __m256 msum1 = _mm256_setzero_ps(); - __m256 signmask = __m256(_mm256_set1_epi32 (0x7fffffffUL)); - - while (d >= 8) { - __m256 mx = _mm256_loadu_ps (x); x += 8; - __m256 my = _mm256_loadu_ps (y); y += 8; - const __m256 a_m_b = mx - my; - msum1 += _mm256_and_ps(signmask, a_m_b); - d -= 8; - } - - __m128 msum2 = _mm256_extractf128_ps(msum1, 1); - msum2 += _mm256_extractf128_ps(msum1, 0); - __m128 signmask2 = __m128(_mm_set1_epi32 (0x7fffffffUL)); - - if (d >= 4) { - __m128 mx = _mm_loadu_ps (x); x += 4; - __m128 my = _mm_loadu_ps (y); y += 4; - const __m128 a_m_b = mx - my; - msum2 += _mm_and_ps(signmask2, a_m_b); - d -= 4; - } - - if (d > 0) { - __m128 mx = masked_read (d, x); - __m128 my = masked_read (d, y); - __m128 a_m_b = mx - my; - msum2 += _mm_and_ps(signmask2, a_m_b); - } - - msum2 = _mm_hadd_ps (msum2, msum2); - msum2 = _mm_hadd_ps (msum2, msum2); - return _mm_cvtss_f32 (msum2); -} - -float fvec_Linf (const float * x, const float * y, size_t d) -{ - __m256 msum1 = _mm256_setzero_ps(); - __m256 signmask = __m256(_mm256_set1_epi32 (0x7fffffffUL)); - - while (d >= 8) { - __m256 mx = _mm256_loadu_ps (x); x += 8; - __m256 my = _mm256_loadu_ps (y); y += 8; - const __m256 a_m_b = mx - my; - msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b)); - d -= 8; - } - - __m128 msum2 = _mm256_extractf128_ps(msum1, 1); - msum2 = _mm_max_ps (msum2, _mm256_extractf128_ps(msum1, 0)); - __m128 signmask2 = __m128(_mm_set1_epi32 (0x7fffffffUL)); - - if (d >= 4) { - __m128 mx = _mm_loadu_ps (x); x += 4; - __m128 my = _mm_loadu_ps (y); y += 4; - const __m128 a_m_b = mx - my; - msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b)); - d -= 4; - } - - if (d > 0) { - __m128 mx = masked_read (d, x); - __m128 my = masked_read (d, y); - __m128 a_m_b = mx - my; - msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b)); - } - - msum2 = _mm_max_ps(_mm_movehl_ps(msum2, msum2), msum2); - msum2 = _mm_max_ps(msum2, _mm_shuffle_ps (msum2, msum2, 1)); - return _mm_cvtss_f32 (msum2); -} - -#elif defined(__SSE__) // But not AVX - -float fvec_L1 (const float * x, const float * y, size_t d) -{ - return fvec_L1_ref (x, y, d); -} - -float fvec_Linf (const float * x, const float * y, size_t d) -{ - return fvec_Linf_ref (x, y, d); -} - - -float fvec_L2sqr (const float * x, - const float * y, - size_t d) -{ - __m128 msum1 = _mm_setzero_ps(); - - while (d >= 4) { - __m128 mx = _mm_loadu_ps (x); x += 4; - __m128 my = _mm_loadu_ps (y); y += 4; - const __m128 a_m_b1 = mx - my; - msum1 += a_m_b1 * a_m_b1; - d -= 4; - } - - if (d > 0) { - // add the last 1, 2 or 3 values - __m128 mx = masked_read (d, x); - __m128 my = masked_read (d, y); - __m128 a_m_b1 = mx - my; - msum1 += a_m_b1 * a_m_b1; - } - - msum1 = _mm_hadd_ps (msum1, msum1); - msum1 = _mm_hadd_ps (msum1, msum1); - return _mm_cvtss_f32 (msum1); -} - - -float fvec_inner_product (const float * x, - const float * y, - size_t d) -{ - __m128 mx, my; - __m128 msum1 = _mm_setzero_ps(); - - while (d >= 4) { - mx = _mm_loadu_ps (x); x += 4; - my = _mm_loadu_ps (y); y += 4; - msum1 = _mm_add_ps (msum1, _mm_mul_ps (mx, my)); - d -= 4; - } - - // add the last 1, 2, or 3 values - mx = masked_read (d, x); - my = masked_read (d, y); - __m128 prod = _mm_mul_ps (mx, my); - - msum1 = _mm_add_ps (msum1, prod); - - msum1 = _mm_hadd_ps (msum1, msum1); - msum1 = _mm_hadd_ps (msum1, msum1); - return _mm_cvtss_f32 (msum1); -} - -#elif defined(__aarch64__) - - -float fvec_L2sqr (const float * x, - const float * y, - size_t d) -{ - if (d & 3) return fvec_L2sqr_ref (x, y, d); - float32x4_t accu = vdupq_n_f32 (0); - for (size_t i = 0; i < d; i += 4) { - float32x4_t xi = vld1q_f32 (x + i); - float32x4_t yi = vld1q_f32 (y + i); - float32x4_t sq = vsubq_f32 (xi, yi); - accu = vfmaq_f32 (accu, sq, sq); - } - float32x4_t a2 = vpaddq_f32 (accu, accu); - return vdups_laneq_f32 (a2, 0) + vdups_laneq_f32 (a2, 1); -} - -float fvec_inner_product (const float * x, - const float * y, - size_t d) -{ - if (d & 3) return fvec_inner_product_ref (x, y, d); - float32x4_t accu = vdupq_n_f32 (0); - for (size_t i = 0; i < d; i += 4) { - float32x4_t xi = vld1q_f32 (x + i); - float32x4_t yi = vld1q_f32 (y + i); - accu = vfmaq_f32 (accu, xi, yi); - } - float32x4_t a2 = vpaddq_f32 (accu, accu); - return vdups_laneq_f32 (a2, 0) + vdups_laneq_f32 (a2, 1); -} - -float fvec_norm_L2sqr (const float *x, size_t d) -{ - if (d & 3) return fvec_norm_L2sqr_ref (x, d); - float32x4_t accu = vdupq_n_f32 (0); - for (size_t i = 0; i < d; i += 4) { - float32x4_t xi = vld1q_f32 (x + i); - accu = vfmaq_f32 (accu, xi, xi); - } - float32x4_t a2 = vpaddq_f32 (accu, accu); - return vdups_laneq_f32 (a2, 0) + vdups_laneq_f32 (a2, 1); -} - -// not optimized for ARM -void fvec_L2sqr_ny (float * dis, const float * x, - const float * y, size_t d, size_t ny) { - fvec_L2sqr_ny_ref (dis, x, y, d, ny); -} - -float fvec_L1 (const float * x, const float * y, size_t d) -{ - return fvec_L1_ref (x, y, d); -} - -float fvec_Linf (const float * x, const float * y, size_t d) -{ - return fvec_Linf_ref (x, y, d); -} - - -#else -// scalar implementation - -float fvec_L2sqr (const float * x, - const float * y, - size_t d) -{ - return fvec_L2sqr_ref (x, y, d); -} - -float fvec_L1 (const float * x, const float * y, size_t d) -{ - return fvec_L1_ref (x, y, d); -} - -float fvec_Linf (const float * x, const float * y, size_t d) -{ - return fvec_Linf_ref (x, y, d); -} - -float fvec_inner_product (const float * x, - const float * y, - size_t d) -{ - return fvec_inner_product_ref (x, y, d); -} - -float fvec_norm_L2sqr (const float *x, size_t d) -{ - return fvec_norm_L2sqr_ref (x, d); -} - -void fvec_L2sqr_ny (float * dis, const float * x, - const float * y, size_t d, size_t ny) { - fvec_L2sqr_ny_ref (dis, x, y, d, ny); -} - - -#endif - - - - - - - - - - - - - - - - - - - - -/*************************************************************************** - * heavily optimized table computations - ***************************************************************************/ - - -static inline void fvec_madd_ref (size_t n, const float *a, - float bf, const float *b, float *c) { - for (size_t i = 0; i < n; i++) - c[i] = a[i] + bf * b[i]; -} - -#ifdef __SSE__ - -static inline void fvec_madd_sse (size_t n, const float *a, - float bf, const float *b, float *c) { - n >>= 2; - __m128 bf4 = _mm_set_ps1 (bf); - __m128 * a4 = (__m128*)a; - __m128 * b4 = (__m128*)b; - __m128 * c4 = (__m128*)c; - - while (n--) { - *c4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4)); - b4++; - a4++; - c4++; - } -} - -void fvec_madd (size_t n, const float *a, - float bf, const float *b, float *c) -{ - if ((n & 3) == 0 && - ((((long)a) | ((long)b) | ((long)c)) & 15) == 0) - fvec_madd_sse (n, a, bf, b, c); - else - fvec_madd_ref (n, a, bf, b, c); -} - -#else - -void fvec_madd (size_t n, const float *a, - float bf, const float *b, float *c) -{ - fvec_madd_ref (n, a, bf, b, c); -} - -#endif - -static inline int fvec_madd_and_argmin_ref (size_t n, const float *a, - float bf, const float *b, float *c) { - float vmin = 1e20; - int imin = -1; - - for (size_t i = 0; i < n; i++) { - c[i] = a[i] + bf * b[i]; - if (c[i] < vmin) { - vmin = c[i]; - imin = i; - } - } - return imin; -} - -#ifdef __SSE__ - -static inline int fvec_madd_and_argmin_sse ( - size_t n, const float *a, - float bf, const float *b, float *c) { - n >>= 2; - __m128 bf4 = _mm_set_ps1 (bf); - __m128 vmin4 = _mm_set_ps1 (1e20); - __m128i imin4 = _mm_set1_epi32 (-1); - __m128i idx4 = _mm_set_epi32 (3, 2, 1, 0); - __m128i inc4 = _mm_set1_epi32 (4); - __m128 * a4 = (__m128*)a; - __m128 * b4 = (__m128*)b; - __m128 * c4 = (__m128*)c; - - while (n--) { - __m128 vc4 = _mm_add_ps (*a4, _mm_mul_ps (bf4, *b4)); - *c4 = vc4; - __m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4); - // imin4 = _mm_blendv_epi8 (imin4, idx4, mask); // slower! - - imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4), - _mm_andnot_si128 (mask, imin4)); - vmin4 = _mm_min_ps (vmin4, vc4); - b4++; - a4++; - c4++; - idx4 = _mm_add_epi32 (idx4, inc4); - } - - // 4 values -> 2 - { - idx4 = _mm_shuffle_epi32 (imin4, 3 << 2 | 2); - __m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 3 << 2 | 2); - __m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4); - imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4), - _mm_andnot_si128 (mask, imin4)); - vmin4 = _mm_min_ps (vmin4, vc4); - } - // 2 values -> 1 - { - idx4 = _mm_shuffle_epi32 (imin4, 1); - __m128 vc4 = _mm_shuffle_ps (vmin4, vmin4, 1); - __m128i mask = (__m128i)_mm_cmpgt_ps (vmin4, vc4); - imin4 = _mm_or_si128 (_mm_and_si128 (mask, idx4), - _mm_andnot_si128 (mask, imin4)); - // vmin4 = _mm_min_ps (vmin4, vc4); - } - return _mm_cvtsi128_si32 (imin4); -} - - -int fvec_madd_and_argmin (size_t n, const float *a, - float bf, const float *b, float *c) -{ - if ((n & 3) == 0 && - ((((long)a) | ((long)b) | ((long)c)) & 15) == 0) - return fvec_madd_and_argmin_sse (n, a, bf, b, c); - else - return fvec_madd_and_argmin_ref (n, a, bf, b, c); -} - -#else - -int fvec_madd_and_argmin (size_t n, const float *a, - float bf, const float *b, float *c) -{ - return fvec_madd_and_argmin_ref (n, a, bf, b, c); -} - -#endif - - - - -} // namespace faiss