Struct faiss::BlockInvertedLists
-
struct BlockInvertedLists : public faiss::InvertedLists
Inverted Lists that are organized by blocks.
Different from the regular inverted lists, the codes are organized by blocks of size block_size bytes that reprsent a set of n_per_block. Therefore, code allocations are always rounded up to block_size bytes. The codes are also aligned on 32-byte boundaries for use with SIMD.
To avoid misinterpretations, the code_size is set to (size_t)(-1), even if arguably the amount of memory consumed by code is block_size / n_per_block.
The writing functions add_entries and update_entries operate on block-aligned data.
Public Types
-
enum subset_type_t
Values:
-
enumerator SUBSET_TYPE_ID_RANGE
-
enumerator SUBSET_TYPE_ID_MOD
-
enumerator SUBSET_TYPE_ELEMENT_RANGE
-
enumerator SUBSET_TYPE_INVLIST_FRACTION
-
enumerator SUBSET_TYPE_INVLIST
-
enumerator SUBSET_TYPE_ID_RANGE
Public Functions
-
BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size)
-
BlockInvertedLists(size_t nlist, const CodePacker *packer)
-
BlockInvertedLists()
-
virtual size_t list_size(size_t list_no) const override
get the size of a list
-
virtual const uint8_t *get_codes(size_t list_no) const override
get the codes for an inverted list must be released by release_codes
- Returns:
codes size list_size * code_size
-
virtual const idx_t *get_ids(size_t list_no) const override
get the ids for an inverted list must be released by release_ids
- Returns:
ids size list_size
-
size_t remove_ids(const IDSelector &sel)
remove ids from the InvertedLists
-
virtual size_t add_entries(size_t list_no, size_t n_entry, const idx_t *ids, const uint8_t *code) override
-
virtual void update_entries(size_t list_no, size_t offset, size_t n_entry, const idx_t *ids, const uint8_t *code) override
not implemented
-
virtual void resize(size_t list_no, size_t new_size) override
-
~BlockInvertedLists() override
-
virtual void release_codes(size_t list_no, const uint8_t *codes) const
release codes returned by get_codes (default implementation is nop
-
virtual void release_ids(size_t list_no, const idx_t *ids) const
release ids returned by get_ids
-
virtual idx_t get_single_id(size_t list_no, size_t offset) const
- Returns:
a single id in an inverted list
-
virtual const uint8_t *get_single_code(size_t list_no, size_t offset) const
- Returns:
a single code in an inverted list (should be deallocated with release_codes)
-
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const
prepare the following lists (default does nothing) a list can be -1 hence the signed long
-
virtual bool is_empty(size_t list_no, void *inverted_list_context = nullptr) const
check if the list is empty
-
virtual InvertedListsIterator *get_iterator(size_t list_no, void *inverted_list_context = nullptr) const
get iterable for lists that use_iterator
-
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code, void *inverted_list_context = nullptr)
add one entry to an inverted list
-
virtual void update_entry(size_t list_no, size_t offset, idx_t id, const uint8_t *code)
-
virtual void reset()
-
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
-
size_t copy_subset_to(InvertedLists &other, subset_type_t subset_type, idx_t a1, idx_t a2) const
copy a subset of the entries index to the other index
- Returns:
number of entries copied
-
double imbalance_factor() const
1= perfectly balanced, >1: imbalanced
-
void print_stats() const
display some stats about the inverted lists
-
size_t compute_ntotal() const
sum up list sizes
Public Members
-
size_t n_per_block = 0
-
size_t block_size = 0
-
const CodePacker *packer = nullptr
-
std::vector<AlignedTable<uint8_t>> codes
-
size_t nlist
number of possible key values
-
size_t code_size
code size per vector in bytes
-
bool use_iterator = false
request to use iterator rather than get_codes / get_ids
Public Static Attributes
-
static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1)
used for BlockInvertedLists, where the codes are packed into groups and the individual code size is meaningless
-
enum subset_type_t