Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Facebook sync (2019-09-10) #943

Merged
merged 15 commits into from
Sep 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
568 changes: 22 additions & 546 deletions AutoTune.cpp

Large diffs are not rendered by default.

53 changes: 2 additions & 51 deletions AutoTune.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include <unordered_map>
#include <stdint.h>

#include "Index.h"
#include "IndexBinary.h"
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>

namespace faiss {

Expand Down Expand Up @@ -203,55 +203,6 @@ struct ParameterSpace {
virtual ~ParameterSpace () {}
};

/** Build and index with the sequence of processing steps described in
* the string. */
Index *index_factory (int d, const char *description,
MetricType metric = METRIC_L2);

IndexBinary *index_binary_factory (int d, const char *description);


/** Reports some statistics on a dataset and comments on them.
*
* It is a class rather than a function so that all stats can also be
* accessed from code */

struct MatrixStats {
MatrixStats (size_t n, size_t d, const float *x);
std::string comments;

// raw statistics
size_t n, d;
size_t n_collision, n_valid, n0;
double min_norm2, max_norm2;

struct PerDimStats {
size_t n, n_nan, n_inf, n0;

float min, max;
double sum, sum2;

size_t n_valid;
double mean, stddev;

PerDimStats();
void add (float x);
void compute_mean_std ();
};

std::vector<PerDimStats> per_dim_stats;
struct Occurrence {
size_t first;
size_t count;
};
std::unordered_map<uint64_t, Occurrence> occurrences;

char *buf;
size_t nbuf;
void do_comment (const char *fmt, ...);

};



} // namespace faiss
Expand Down
12 changes: 7 additions & 5 deletions Clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@

// -*- c++ -*-

#include "Clustering.h"
#include "AuxIndexStructures.h"
#include <faiss/Clustering.h>
#include <faiss/impl/AuxIndexStructures.h>


#include <cmath>
#include <cstdio>
#include <cstring>

#include "utils.h"
#include "FaissAssert.h"
#include "IndexFlat.h"
#include <faiss/utils/utils.h>
#include <faiss/utils/random.h>
#include <faiss/utils/distances.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexFlat.h>

namespace faiss {

Expand Down
2 changes: 1 addition & 1 deletion Clustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#ifndef FAISS_CLUSTERING_H
#define FAISS_CLUSTERING_H
#include "Index.h"
#include <faiss/Index.h>

#include <vector>

Expand Down
20 changes: 16 additions & 4 deletions IVFlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

// -*- c++ -*-

#include "IVFlib.h"
#include <faiss/IVFlib.h>

#include <memory>

#include "VectorTransform.h"
#include "FaissAssert.h"
#include <faiss/IndexPreTransform.h>
#include <faiss/impl/FaissAssert.h>



Expand Down Expand Up @@ -294,7 +294,8 @@ void set_invlist_range (Index *index, long i0, long i1,
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params)
IVFSearchParameters *params,
size_t *nb_dis_ptr)
{
FAISS_THROW_IF_NOT (params);
const float *prev_x = x;
Expand All @@ -317,6 +318,17 @@ void search_with_parameters (const Index *index,
index_ivf->quantizer->search(n, x, params->nprobe,
Dq.data(), Iq.data());

if (nb_dis_ptr) {
size_t nb_dis = 0;
const InvertedLists *il = index_ivf->invlists;
for (idx_t i = 0; i < n * params->nprobe; i++) {
if (Iq[i] >= 0) {
nb_dis += il->list_size(Iq[i]);
}
}
*nb_dis_ptr = nb_dis;
}

index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
distances, labels,
false, params);
Expand Down
13 changes: 8 additions & 5 deletions IVFlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/

#include <vector>
#include "IndexIVF.h"
#include <faiss/IndexIVF.h>

namespace faiss { namespace ivflib {

Expand Down Expand Up @@ -116,13 +116,16 @@ ArrayInvertedLists * get_invlist_range (const Index *index,
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src);


// search an IndexIVF, possibly embedded in an IndexPreTransform
// with given parameters
// search an IndexIVF, possibly embedded in an IndexPreTransform with
// given parameters. Optionally returns the number of distances
// computed
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params);
IVFSearchParameters *params,
size_t *nb_dis = nullptr);



} } // namespace faiss::ivflib

Expand Down
39 changes: 32 additions & 7 deletions Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

// -*- c++ -*-

#include "AuxIndexStructures.h"
#include "FaissAssert.h"
#include "utils.h"
#include <faiss/Index.h>

#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>

#include <cstring>

Expand Down Expand Up @@ -83,17 +85,40 @@ void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
}
}


void Index::compute_residual (const float * x,
float * residual, idx_t key) const {
reconstruct (key, residual);
for (size_t i = 0; i < d; i++)
for (size_t i = 0; i < d; i++) {
residual[i] = x[i] - residual[i];
}
}

void Index::compute_residual_n (idx_t n, const float* xs,
float* residuals,
const idx_t* keys) const {
#pragma omp parallel for
for (idx_t i = 0; i < n; ++i) {
compute_residual(&xs[i * d], &residuals[i * d], keys[i]);
}
}


void Index::display () const {
printf ("Index: %s -> %ld elements\n", typeid (*this).name(), ntotal);

size_t Index::sa_code_size () const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}

void Index::sa_encode (idx_t, const float *,
uint8_t *) const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}

void Index::sa_decode (idx_t, const uint8_t *,
float *) const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}


Expand Down
50 changes: 45 additions & 5 deletions Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#include <sstream>

#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR 5
#define FAISS_VERSION_PATCH 3
#define FAISS_VERSION_MINOR 4
#define FAISS_VERSION_PATCH 0

/**
* @namespace faiss
Expand Down Expand Up @@ -200,10 +200,25 @@ struct Index {
* @param residual output residual vector, size d
* @param key encoded index, as returned by search and assign
*/
void compute_residual (const float * x, float * residual, idx_t key) const;
virtual void compute_residual (const float * x,
float * residual, idx_t key) const;

/** Display the actual class name and some more info */
void display () const;
/** Computes a residual vector after indexing encoding (batch form).
* Equivalent to calling compute_residual for each vector.
*
* The residual vector is the difference between a vector and the
* reconstruction that can be decoded from its representation in
* the index. The residual can be used for multiple-stage indexing
* methods, like IndexIVF's methods.
*
* @param n number of vectors
* @param xs input vectors, size (n x d)
* @param residuals output residual vectors, size (n x d)
* @param keys encoded index, as returned by search and assign
*/
virtual void compute_residual_n (idx_t n, const float* xs,
float* residuals,
const idx_t* keys) const;

/** Get a DistanceComputer (defined in AuxIndexStructures) object
* for this kind of index.
Expand All @@ -213,6 +228,31 @@ struct Index {
*/
virtual DistanceComputer * get_distance_computer() const;


/* The standalone codec interface */

/** size of the produced codes in bytes */
virtual size_t sa_code_size () const;

/** encode a set of vectors
*
* @param n number of vectors
* @param x input vectors, size n * d
* @param bytes output encoded vectors, size n * sa_code_size()
*/
virtual void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const;

/** encode a set of vectors
*
* @param n number of vectors
* @param bytes input encoded vectors, size n * sa_code_size()
* @param x output vectors, size n * d
*/
virtual void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const;


};

}
Expand Down
Loading