Struct faiss::IndexIVFFastScan
- 
struct IndexIVFFastScan : public faiss::IndexIVF
- Fast scan version of IVFPQ and IVFAQ. Works for 4-bit PQ/AQ for now. - The codes in the inverted lists are not stored sequentially but grouped in blocks of size bbs. This makes it possible to very quickly compute distances with SIMD instructions. - Implementations (implem): 0: auto-select implementation (default) 1: orig’s search, re-implemented 2: orig’s search, re-ordered by invlist 10: optimizer int16 search, collect results in heap, no qbs 11: idem, collect results in reservoir 12: optimizer int16 search, collect results in heap, uses qbs 13: idem, collect results in reservoir 14: internally multithreaded implem over nq * nprobe 15: same with reservoir - For range search, only 10 and 12 are supported. add 100 to the implem to force single-thread scanning (the coarse quantizer may still use multiple threads). - Subclassed by faiss::IndexIVFAdditiveQuantizerFastScan, faiss::IndexIVFPQFastScan - Public Types - 
using component_t = float
 - 
using distance_t = float
 - Public Functions - 
IndexIVFFastScan(Index *quantizer, size_t d, size_t nlist, size_t code_size, MetricType metric = METRIC_L2)
 - 
IndexIVFFastScan()
 - 
void init_fastscan(size_t M, size_t nbits, size_t nlist, MetricType metric, int bbs)
 - 
void init_code_packer()
 - 
~IndexIVFFastScan() override
 - 
virtual void add_with_ids(idx_t n, const float *x, const idx_t *xids) override
- default implementation that calls encode_vectors 
 - 
virtual bool lookup_table_is_3d() const = 0
 - 
virtual void compute_LUT(size_t n, const float *x, const CoarseQuantized &cq, AlignedTable<float> &dis_tables, AlignedTable<float> &biases) const = 0
 - 
void compute_LUT_uint8(size_t n, const float *x, const CoarseQuantized &cq, AlignedTable<uint8_t> &dis_tables, AlignedTable<uint16_t> &biases, float *normalizers) const
 - 
virtual void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, const SearchParameters *params = nullptr) const override
- assign the vectors, then call search_preassign 
 - 
virtual void search_preassigned(idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, const IVFSearchParameters *params = nullptr, IndexIVFStats *stats = nullptr) const override
- search a set of vectors, that are pre-quantized by the IVF quantizer. Fill in the corresponding heaps with the query results. The default implementation uses InvertedListScanners to do the search. - Parameters:
- n – nb of vectors to query 
- x – query vectors, size nx * d 
- assign – coarse quantization indices, size nx * nprobe 
- centroid_dis – distances to coarse centroids, size nx * nprobe 
- distance – output distances, size n * k 
- labels – output labels, size n * k 
- store_pairs – store inv list index + inv list offset instead in upper/lower 32 bit of result, instead of ids (used for reranking). 
- params – used to override the object’s search parameters 
- stats – search stats to be updated (can be null) 
 
 
 - 
virtual void range_search(idx_t n, const float *x, float radius, RangeSearchResult *result, const SearchParameters *params = nullptr) const override
- query n vectors of dimension d to the index. - return all vectors with distance < radius. Note that many indexes do not implement the range_search (only the k-NN search is mandatory). - Parameters:
- n – number of vectors 
- x – input vectors to search, size n * d 
- radius – search radius 
- result – result table 
 
 
 - 
void search_dispatch_implem(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, const CoarseQuantized &cq, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
void range_search_dispatch_implem(idx_t n, const float *x, float radius, RangeSearchResult &rres, const CoarseQuantized &cq_in, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
template<class C>
 void search_implem_1(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, const CoarseQuantized &cq, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
template<class C>
 void search_implem_2(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, const CoarseQuantized &cq, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
void search_implem_10(idx_t n, const float *x, SIMDResultHandlerToFloat &handler, const CoarseQuantized &cq, size_t *ndis_out, size_t *nlist_out, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
void search_implem_12(idx_t n, const float *x, SIMDResultHandlerToFloat &handler, const CoarseQuantized &cq, size_t *ndis_out, size_t *nlist_out, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
void search_implem_14(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, const CoarseQuantized &cq, int impl, const NormTableScaler *scaler, const IVFSearchParameters *params = nullptr) const
 - 
virtual void reconstruct_from_offset(int64_t list_no, int64_t offset, float *recons) const override
- Reconstruct a vector given the location in terms of (inv list index + inv list offset) instead of the id. - Useful for reconstructing when the direct_map is not maintained and the inv list offset is computed by search_preassigned() with - store_pairsset.
 - 
virtual CodePacker *get_CodePacker() const override
 - 
void reconstruct_orig_invlists()
 - 
virtual void reset() override
- removes all elements from the database. 
 - 
virtual void train(idx_t n, const float *x) override
- Trains the quantizer and calls train_encoder to train sub-quantizers. 
 - 
virtual void add(idx_t n, const float *x) override
- Calls add_with_ids with NULL ids. 
 - 
virtual void add_core(idx_t n, const float *x, const idx_t *xids, const idx_t *precomputed_idx, void *inverted_list_context = nullptr)
- Implementation of vector addition where the vector assignments are predefined. The default implementation hands over the code extraction to encode_vectors. - Parameters:
- precomputed_idx – quantization indices for the input vectors (size n) 
 
 - 
virtual void encode_vectors(idx_t n, const float *x, const idx_t *list_nos, uint8_t *codes, bool include_listno = false) const = 0
- Encodes a set of vectors as they would appear in the inverted lists - Parameters:
- list_nos – inverted list ids as returned by the quantizer (size n). -1s are ignored. 
- codes – output codes, size n * code_size 
- include_listno – include the list ids in the code (in this case add ceil(log8(nlist)) to the code size) 
 
 
 - 
virtual void add_sa_codes(idx_t n, const uint8_t *codes, const idx_t *xids) override
- Add vectors that are computed with the standalone codec - Parameters:
- codes – codes to add size n * sa_code_size() 
- xids – corresponding ids, size n 
 
 
 - 
virtual void train_encoder(idx_t n, const float *x, const idx_t *assign)
- Train the encoder for the vectors. - If by_residual then it is called with residuals and corresponding assign array, otherwise x is the raw training vectors and assign=nullptr 
 - 
virtual idx_t train_encoder_num_vectors() const
- can be redefined by subclasses to indicate how many training vectors they need 
 - 
virtual void range_search_preassigned(idx_t nx, const float *x, float radius, const idx_t *keys, const float *coarse_dis, RangeSearchResult *result, bool store_pairs = false, const IVFSearchParameters *params = nullptr, IndexIVFStats *stats = nullptr) const override
- Range search a set of vectors, that are pre-quantized by the IVF quantizer. Fill in the RangeSearchResults results. The default implementation uses InvertedListScanners to do the search. - Parameters:
- n – nb of vectors to query 
- x – query vectors, size nx * d 
- assign – coarse quantization indices, size nx * nprobe 
- centroid_dis – distances to coarse centroids, size nx * nprobe 
- result – Output results 
- store_pairs – store inv list index + inv list offset instead in upper/lower 32 bit of result, instead of ids (used for reranking). 
- params – used to override the object’s search parameters 
- stats – search stats to be updated (can be null) 
 
 
 - 
virtual InvertedListScanner *get_InvertedListScanner(bool store_pairs = false, const IDSelector *sel = nullptr) const
- Get a scanner for this index (store_pairs means ignore labels) - The default search implementation uses this to compute the distances 
 - 
virtual void reconstruct(idx_t key, float *recons) const override
- reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2 
 - 
virtual void update_vectors(int nv, const idx_t *idx, const float *v)
- Update a subset of vectors. - The index must have a direct_map - Parameters:
- nv – nb of vectors to update 
- idx – vector indices to update, size nv 
- v – vectors of new values, size nv*d 
 
 
 - 
virtual void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
- Reconstruct a subset of the indexed vectors. - Overrides default implementation to bypass reconstruct() which requires direct_map to be maintained. - Parameters:
- i0 – first vector to reconstruct 
- ni – nb of vectors to reconstruct 
- recons – output array of reconstructed vectors, size ni * d 
 
 
 - 
virtual void search_and_reconstruct(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, float *recons, const SearchParameters *params = nullptr) const override
- Similar to search, but also reconstructs the stored vectors (or an approximation in the case of lossy coding) for the search results. - Overrides default implementation to avoid having to maintain direct_map and instead fetch the code offsets through the - store_pairsflag in search_preassigned().- Parameters:
- recons – reconstructed vectors size (n, k, d) 
 
 - 
void search_and_return_codes(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels, uint8_t *recons, bool include_listno = false, const SearchParameters *params = nullptr) const
- Similar to search, but also returns the codes corresponding to the stored vectors for the search results. - Parameters:
- codes – codes (n, k, code_size) 
- include_listno – include the list ids in the code (in this case add ceil(log8(nlist)) to the code size) 
 
 
 - 
virtual size_t remove_ids(const IDSelector &sel) override
- Dataset manipulation functions. 
 - 
virtual void check_compatible_for_merge(const Index &otherIndex) const override
- check that the two indexes are compatible (ie, they are trained in the same way and have the same parameters). Otherwise throw. 
 - 
virtual void merge_from(Index &otherIndex, idx_t add_id) override
- moves the entries from another dataset to self. On output, other is empty. add_id is added to all moved ids (for sequential ids, this would be this->ntotal) 
 - 
virtual void copy_subset_to(IndexIVF &other, InvertedLists::subset_type_t subset_type, idx_t a1, idx_t a2) const
- copy a subset of the entries index to the other index see Invlists::copy_subset_to for the meaning of subset_type 
 - 
inline size_t get_list_size(size_t list_no) const
 - 
bool check_ids_sorted() const
- are the ids sorted? 
 - 
void make_direct_map(bool new_maintain_direct_map = true)
- initialize a direct map - Parameters:
- new_maintain_direct_map – if true, create a direct map, else clear it 
 
 - 
void replace_invlists(InvertedLists *il, bool own = false)
- replace the inverted lists, old one is deallocated if own_invlists 
 - 
virtual size_t sa_code_size() const override
- size of the produced codes in bytes 
 - 
virtual void sa_encode(idx_t n, const float *x, uint8_t *bytes) const override
- encode a set of vectors sa_encode will call encode_vector with include_listno=true - Parameters:
- n – nb of vectors to encode 
- x – the vectors to encode 
- bytes – output array for the codes 
 
- Returns:
- nb of bytes written to codes 
 
 - 
virtual void assign(idx_t n, const float *x, idx_t *labels, idx_t k = 1) const
- return the indexes of the k vectors closest to the query x. - This function is identical as search but only return labels of neighbors. - Parameters:
- n – number of vectors 
- x – input vectors to search, size n * d 
- labels – output labels of the NNs, size n*k 
- k – number of nearest neighbours 
 
 
 - 
virtual void reconstruct_batch(idx_t n, const idx_t *keys, float *recons) const
- Reconstruct several stored vectors (or an approximation if lossy coding) - this function may not be defined for some indexes - Parameters:
- n – number of vectors to reconstruct 
- keys – ids of the vectors to reconstruct (size n) 
- recons – reconstucted vector (size n * d) 
 
 
 - 
virtual void compute_residual(const float *x, float *residual, idx_t key) const
- Computes a residual vector after indexing encoding. - The residual vector is the difference between a vector and the reconstruction that can be decoded from its representation in the index. The residual can be used for multiple-stage indexing methods, like IndexIVF’s methods. - Parameters:
- x – input vector, size d 
- residual – output residual vector, size d 
- key – encoded index, as returned by search and assign 
 
 
 - 
virtual void compute_residual_n(idx_t n, const float *xs, float *residuals, const idx_t *keys) const
- Computes a residual vector after indexing encoding (batch form). Equivalent to calling compute_residual for each vector. - The residual vector is the difference between a vector and the reconstruction that can be decoded from its representation in the index. The residual can be used for multiple-stage indexing methods, like IndexIVF’s methods. - Parameters:
- n – number of vectors 
- xs – input vectors, size (n x d) 
- residuals – output residual vectors, size (n x d) 
- keys – encoded index, as returned by search and assign 
 
 
 - 
virtual DistanceComputer *get_distance_computer() const
- Get a DistanceComputer (defined in AuxIndexStructures) object for this kind of index. - DistanceComputer is implemented for indexes that support random access of their vectors. 
 - 
virtual void sa_decode(idx_t n, const uint8_t *bytes, float *x) const
- decode a set of vectors - Parameters:
- n – number of vectors 
- bytes – input encoded vectors, size n * sa_code_size() 
- x – output vectors, size n * d 
 
 
 - 
void train_q1(size_t n, const float *x, bool verbose, MetricType metric_type)
- Trains the quantizer and calls train_residual to train sub-quantizers. 
 - 
size_t coarse_code_size() const
- compute the number of bytes required to store list ids 
 - 
void encode_listno(idx_t list_no, uint8_t *code) const
 - 
idx_t decode_listno(const uint8_t *code) const
 - Public Members - 
int bbs
 - 
size_t M
 - 
size_t nbits
 - 
size_t ksub
 - 
size_t M2
 - 
int implem = 0
 - 
int skip = 0
 - 
int qbs = 0
 - 
size_t qbs2 = 0
 - 
InvertedLists *orig_invlists = nullptr
- orig’s inverted lists (for debugging) 
 - 
InvertedLists *invlists = nullptr
- Access to the actual data. 
 - 
bool own_invlists = false
 - 
size_t code_size = 0
- code size per vector in bytes 
 - 
int parallel_mode = 0
- Parallel mode determines how queries are parallelized with OpenMP - 0 (default): split over queries 1: parallelize over inverted lists 2: parallelize over both 3: split over queries with a finer granularity - PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to prevent the heap to be initialized and finalized 
 - 
const int PARALLEL_MODE_NO_HEAP_INIT = 1024
 - 
DirectMap direct_map
- optional map that maps back ids to invlist entries. This enables reconstruct() 
 - 
bool by_residual = true
- do the codes in the invlists encode the vectors relative to the centroids? 
 - 
int d
- vector dimension 
 - 
idx_t ntotal
- total nb of indexed vectors 
 - 
bool verbose
- verbosity level 
 - 
bool is_trained
- set if the Index does not require training, or if training is done already 
 - 
MetricType metric_type
- type of metric this index uses for search 
 - 
float metric_arg
- argument of the metric type 
 - 
size_t nprobe = 1
- number of probes at query time 
 - 
size_t max_codes = 0
- max nb of codes to visit to do a query 
 - 
Index *quantizer = nullptr
- quantizer that maps vectors to inverted lists 
 - 
size_t nlist = 0
- number of inverted lists 
 - 
char quantizer_trains_alone = 0
- = 0: use the quantizer as index in a kmeans training = 1: just pass on the training set to the train() of the quantizer = 2: kmeans training on a flat index + add the centroids to the quantizer 
 - 
bool own_fields = false
- whether object owns the quantizer 
 - 
ClusteringParameters cp
- to override default clustering params 
 - 
Index *clustering_index = nullptr
- to override index used during clustering 
 - 
struct CoarseQuantized
 
- 
using component_t = float