Skip to content

Commit

Permalink
Facebook sync (2019-09-10) (#943)
Browse files Browse the repository at this point in the history
* Facebook sync (2019-09-10)

* Fix depends Makefile target.

* Add faiss symlink for new include directives.

* Fix missing header.

* Fix tests.

* Fix Makefile.

* Update depend.

* Fix include directives spacing.
  • Loading branch information
Lucas Hosseini authored Sep 20, 2019
1 parent 8b68260 commit 36ddba9
Show file tree
Hide file tree
Showing 309 changed files with 14,867 additions and 11,720 deletions.
568 changes: 22 additions & 546 deletions AutoTune.cpp

Large diffs are not rendered by default.

53 changes: 2 additions & 51 deletions AutoTune.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include <unordered_map>
#include <stdint.h>

#include "Index.h"
#include "IndexBinary.h"
#include <faiss/Index.h>
#include <faiss/IndexBinary.h>

namespace faiss {

Expand Down Expand Up @@ -203,55 +203,6 @@ struct ParameterSpace {
virtual ~ParameterSpace () {}
};

/** Build and index with the sequence of processing steps described in
* the string. */
Index *index_factory (int d, const char *description,
MetricType metric = METRIC_L2);

IndexBinary *index_binary_factory (int d, const char *description);


/** Reports some statistics on a dataset and comments on them.
*
* It is a class rather than a function so that all stats can also be
* accessed from code */

struct MatrixStats {
MatrixStats (size_t n, size_t d, const float *x);
std::string comments;

// raw statistics
size_t n, d;
size_t n_collision, n_valid, n0;
double min_norm2, max_norm2;

struct PerDimStats {
size_t n, n_nan, n_inf, n0;

float min, max;
double sum, sum2;

size_t n_valid;
double mean, stddev;

PerDimStats();
void add (float x);
void compute_mean_std ();
};

std::vector<PerDimStats> per_dim_stats;
struct Occurrence {
size_t first;
size_t count;
};
std::unordered_map<uint64_t, Occurrence> occurrences;

char *buf;
size_t nbuf;
void do_comment (const char *fmt, ...);

};



} // namespace faiss
Expand Down
12 changes: 7 additions & 5 deletions Clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@

// -*- c++ -*-

#include "Clustering.h"
#include "AuxIndexStructures.h"
#include <faiss/Clustering.h>
#include <faiss/impl/AuxIndexStructures.h>


#include <cmath>
#include <cstdio>
#include <cstring>

#include "utils.h"
#include "FaissAssert.h"
#include "IndexFlat.h"
#include <faiss/utils/utils.h>
#include <faiss/utils/random.h>
#include <faiss/utils/distances.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexFlat.h>

namespace faiss {

Expand Down
2 changes: 1 addition & 1 deletion Clustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#ifndef FAISS_CLUSTERING_H
#define FAISS_CLUSTERING_H
#include "Index.h"
#include <faiss/Index.h>

#include <vector>

Expand Down
20 changes: 16 additions & 4 deletions IVFlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

// -*- c++ -*-

#include "IVFlib.h"
#include <faiss/IVFlib.h>

#include <memory>

#include "VectorTransform.h"
#include "FaissAssert.h"
#include <faiss/IndexPreTransform.h>
#include <faiss/impl/FaissAssert.h>



Expand Down Expand Up @@ -294,7 +294,8 @@ void set_invlist_range (Index *index, long i0, long i1,
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params)
IVFSearchParameters *params,
size_t *nb_dis_ptr)
{
FAISS_THROW_IF_NOT (params);
const float *prev_x = x;
Expand All @@ -317,6 +318,17 @@ void search_with_parameters (const Index *index,
index_ivf->quantizer->search(n, x, params->nprobe,
Dq.data(), Iq.data());

if (nb_dis_ptr) {
size_t nb_dis = 0;
const InvertedLists *il = index_ivf->invlists;
for (idx_t i = 0; i < n * params->nprobe; i++) {
if (Iq[i] >= 0) {
nb_dis += il->list_size(Iq[i]);
}
}
*nb_dis_ptr = nb_dis;
}

index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
distances, labels,
false, params);
Expand Down
13 changes: 8 additions & 5 deletions IVFlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*/

#include <vector>
#include "IndexIVF.h"
#include <faiss/IndexIVF.h>

namespace faiss { namespace ivflib {

Expand Down Expand Up @@ -116,13 +116,16 @@ ArrayInvertedLists * get_invlist_range (const Index *index,
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src);


// search an IndexIVF, possibly embedded in an IndexPreTransform
// with given parameters
// search an IndexIVF, possibly embedded in an IndexPreTransform with
// given parameters. Optionally returns the number of distances
// computed
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params);
IVFSearchParameters *params,
size_t *nb_dis = nullptr);



} } // namespace faiss::ivflib

Expand Down
39 changes: 32 additions & 7 deletions Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

// -*- c++ -*-

#include "AuxIndexStructures.h"
#include "FaissAssert.h"
#include "utils.h"
#include <faiss/Index.h>

#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>

#include <cstring>

Expand Down Expand Up @@ -83,17 +85,40 @@ void Index::search_and_reconstruct (idx_t n, const float *x, idx_t k,
}
}


void Index::compute_residual (const float * x,
float * residual, idx_t key) const {
reconstruct (key, residual);
for (size_t i = 0; i < d; i++)
for (size_t i = 0; i < d; i++) {
residual[i] = x[i] - residual[i];
}
}

void Index::compute_residual_n (idx_t n, const float* xs,
float* residuals,
const idx_t* keys) const {
#pragma omp parallel for
for (idx_t i = 0; i < n; ++i) {
compute_residual(&xs[i * d], &residuals[i * d], keys[i]);
}
}


void Index::display () const {
printf ("Index: %s -> %ld elements\n", typeid (*this).name(), ntotal);

size_t Index::sa_code_size () const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}

void Index::sa_encode (idx_t, const float *,
uint8_t *) const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}

void Index::sa_decode (idx_t, const uint8_t *,
float *) const
{
FAISS_THROW_MSG ("standalone codec not implemented for this type of index");
}


Expand Down
50 changes: 45 additions & 5 deletions Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#include <sstream>

#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR 5
#define FAISS_VERSION_PATCH 3
#define FAISS_VERSION_MINOR 4
#define FAISS_VERSION_PATCH 0

/**
* @namespace faiss
Expand Down Expand Up @@ -200,10 +200,25 @@ struct Index {
* @param residual output residual vector, size d
* @param key encoded index, as returned by search and assign
*/
void compute_residual (const float * x, float * residual, idx_t key) const;
virtual void compute_residual (const float * x,
float * residual, idx_t key) const;

/** Display the actual class name and some more info */
void display () const;
/** Computes a residual vector after indexing encoding (batch form).
* Equivalent to calling compute_residual for each vector.
*
* The residual vector is the difference between a vector and the
* reconstruction that can be decoded from its representation in
* the index. The residual can be used for multiple-stage indexing
* methods, like IndexIVF's methods.
*
* @param n number of vectors
* @param xs input vectors, size (n x d)
* @param residuals output residual vectors, size (n x d)
* @param keys encoded index, as returned by search and assign
*/
virtual void compute_residual_n (idx_t n, const float* xs,
float* residuals,
const idx_t* keys) const;

/** Get a DistanceComputer (defined in AuxIndexStructures) object
* for this kind of index.
Expand All @@ -213,6 +228,31 @@ struct Index {
*/
virtual DistanceComputer * get_distance_computer() const;


/* The standalone codec interface */

/** size of the produced codes in bytes */
virtual size_t sa_code_size () const;

/** encode a set of vectors
*
* @param n number of vectors
* @param x input vectors, size n * d
* @param bytes output encoded vectors, size n * sa_code_size()
*/
virtual void sa_encode (idx_t n, const float *x,
uint8_t *bytes) const;

/** encode a set of vectors
*
* @param n number of vectors
* @param bytes input encoded vectors, size n * sa_code_size()
* @param x output vectors, size n * d
*/
virtual void sa_decode (idx_t n, const uint8_t *bytes,
float *x) const;


};

}
Expand Down
Loading

0 comments on commit 36ddba9

Please sign in to comment.