Struct faiss::IndexHNSWPQ
-
struct IndexHNSWPQ : public faiss::IndexHNSW
PQ index topped with with a HNSW structure to access elements more efficiently.
Public Types
-
typedef HNSW::storage_idx_t storage_idx_t
-
using component_t = float
-
using distance_t = float
Public Functions
-
IndexHNSWPQ()
-
IndexHNSWPQ(int d, int pq_m, int M, int pq_nbits = 8, MetricType metric = METRIC_L2)
-
virtual void train(idx_t n, const float *x) override
Trains the storage if needed.
-
virtual void add(idx_t n, const float *x) override
Add n vectors of dimension d to the index.
Vectors are implicitly assigned labels ntotal .. ntotal + n - 1 This function slices the input vectors in chunks smaller than blocksize_add and calls add_core.
- Parameters:
n – number of vectors
x – input matrix, size n * d
-
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, const SearchParameters *params = nullptr) const override
entry point for search
-
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result, const SearchParameters *params = nullptr) const override
query n vectors of dimension d to the index.
return all vectors with distance < radius. Note that many indexes do not implement the range_search (only the k-NN search is mandatory).
- Parameters:
n – number of vectors
x – input vectors to search, size n * d
radius – search radius
result – result table
-
virtual void reconstruct(idx_t key, float *recons) const override
Reconstruct a stored vector (or an approximation if lossy coding)
this function may not be defined for some indexes
- Parameters:
key – id of the vector to reconstruct
recons – reconstucted vector (size d)
-
virtual void reset() override
removes all elements from the database.
-
void shrink_level_0_neighbors(int size)
-
void search_level_0(idx_t n, const float *x, idx_t k, const storage_idx_t *nearest, const float *nearest_d, float *distances, idx_t *labels, int nprobe = 1, int search_type = 1, const SearchParameters *params = nullptr) const
Perform search only on level 0, given the starting points for each vertex.
- Parameters:
search_type – 1:perform one search per nprobe, 2: enqueue all entry points
-
void init_level_0_from_knngraph(int k, const float *D, const idx_t *I)
alternative graph building
-
void init_level_0_from_entry_points(int npt, const storage_idx_t *points, const storage_idx_t *nearests)
alternative graph building
-
void reorder_links()
-
void link_singletons()
-
void permute_entries(const idx_t *perm)
-
virtual DistanceComputer *get_distance_computer() const override
Get a DistanceComputer (defined in AuxIndexStructures) object for this kind of index.
DistanceComputer is implemented for indexes that support random access of their vectors.
-
virtual void add_with_ids(idx_t n, const float *x, const idx_t *xids)
Same as add, but stores xids instead of sequential ids.
The default implementation fails with an assertion, as it is not supported by all indexes.
- Parameters:
n – number of vectors
x – input vectors, size n * d
xids – if non-null, ids to store for the vectors (size n)
-
virtual void assign(idx_t n, const float *x, idx_t *labels, idx_t k = 1) const
return the indexes of the k vectors closest to the query x.
This function is identical as search but only return labels of neighbors.
- Parameters:
n – number of vectors
x – input vectors to search, size n * d
labels – output labels of the NNs, size n*k
k – number of nearest neighbours
-
virtual size_t remove_ids(const IDSelector &sel)
removes IDs from the index. Not supported by all indexes. Returns the number of elements removed.
-
virtual void reconstruct_batch(idx_t n, const idx_t *keys, float *recons) const
Reconstruct several stored vectors (or an approximation if lossy coding)
this function may not be defined for some indexes
- Parameters:
n – number of vectors to reconstruct
keys – ids of the vectors to reconstruct (size n)
recons – reconstucted vector (size n * d)
-
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const
Reconstruct vectors i0 to i0 + ni - 1
this function may not be defined for some indexes
- Parameters:
i0 – index of the first vector in the sequence
ni – number of vectors in the sequence
recons – reconstucted vector (size ni * d)
-
virtual void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons, const SearchParameters *params = nullptr) const
Similar to search, but also reconstructs the stored vectors (or an approximation in the case of lossy coding) for the search results.
If there are not enough results for a query, the resulting arrays is padded with -1s.
- Parameters:
n – number of vectors
x – input vectors to search, size n * d
k – number of extracted vectors
distances – output pairwise distances, size n*k
labels – output labels of the NNs, size n*k
recons – reconstructed vectors size (n, k, d)
-
virtual void compute_residual(const float *x, float *residual, idx_t key) const
Computes a residual vector after indexing encoding.
The residual vector is the difference between a vector and the reconstruction that can be decoded from its representation in the index. The residual can be used for multiple-stage indexing methods, like IndexIVF’s methods.
- Parameters:
x – input vector, size d
residual – output residual vector, size d
key – encoded index, as returned by search and assign
-
virtual void compute_residual_n(idx_t n, const float *xs, float *residuals, const idx_t *keys) const
Computes a residual vector after indexing encoding (batch form). Equivalent to calling compute_residual for each vector.
The residual vector is the difference between a vector and the reconstruction that can be decoded from its representation in the index. The residual can be used for multiple-stage indexing methods, like IndexIVF’s methods.
- Parameters:
n – number of vectors
xs – input vectors, size (n x d)
residuals – output residual vectors, size (n x d)
keys – encoded index, as returned by search and assign
-
virtual size_t sa_code_size() const
size of the produced codes in bytes
-
virtual void sa_encode(idx_t n, const float *x, uint8_t *bytes) const
encode a set of vectors
- Parameters:
n – number of vectors
x – input vectors, size n * d
bytes – output encoded vectors, size n * sa_code_size()
-
virtual void sa_decode(idx_t n, const uint8_t *bytes, float *x) const
decode a set of vectors
- Parameters:
n – number of vectors
bytes – input encoded vectors, size n * sa_code_size()
x – output vectors, size n * d
-
virtual void merge_from(Index &otherIndex, idx_t add_id = 0)
moves the entries from another dataset to self. On output, other is empty. add_id is added to all moved ids (for sequential ids, this would be this->ntotal)
-
virtual void check_compatible_for_merge(const Index &otherIndex) const
check that the two indexes are compatible (ie, they are trained in the same way and have the same parameters). Otherwise throw.
-
virtual void add_sa_codes(idx_t n, const uint8_t *codes, const idx_t *xids)
Add vectors that are computed with the standalone codec
- Parameters:
codes – codes to add size n * sa_code_size()
xids – corresponding ids, size n
Public Members
-
HNSW hnsw
-
bool own_fields = false
-
Index *storage = nullptr
-
bool init_level0 = true
-
bool keep_max_size_level0 = false
-
int d
vector dimension
-
idx_t ntotal
total nb of indexed vectors
-
bool verbose
verbosity level
-
bool is_trained
set if the Index does not require training, or if training is done already
-
MetricType metric_type
type of metric this index uses for search
-
float metric_arg
argument of the metric type
-
typedef HNSW::storage_idx_t storage_idx_t