Struct faiss::OnDiskInvertedLists
-
struct OnDiskInvertedLists : public faiss::InvertedLists
On-disk storage of inverted lists.
The data is stored in a mmapped chunk of memory (base pointer ptr, size totsize). Each list is a range of memory that contains (object List) that contains:
uint8_t codes[capacity * code_size]
followed by idx_t ids[capacity]
in each of the arrays, the size <= capacity first elements are used, the rest is not initialized.
Addition and resize are supported by:
roundind up the capacity of the lists to a power of two
maintaining a list of empty slots, sorted by size.
resizing the mmapped block is adjusted as needed.
An OnDiskInvertedLists is compact if the size == capacity for all lists and there are no available slots.
Addition to the invlists is slow. For incremental add it is better to use a default ArrayInvertedLists object and convert it to an OnDisk with merge_from.
When it is known that a set of lists will be accessed, it is useful to call prefetch_lists, that launches a set of threads to read the lists in parallel.
Public Types
-
using List = OnDiskOneList
-
enum subset_type_t
Values:
-
enumerator SUBSET_TYPE_ID_RANGE
-
enumerator SUBSET_TYPE_ID_MOD
-
enumerator SUBSET_TYPE_ELEMENT_RANGE
-
enumerator SUBSET_TYPE_INVLIST_FRACTION
-
enumerator SUBSET_TYPE_INVLIST
-
enumerator SUBSET_TYPE_ID_RANGE
Public Functions
-
OnDiskInvertedLists(size_t nlist, size_t code_size, const char *filename)
are inverted lists mapped read-only
-
virtual size_t list_size(size_t list_no) const override
get the size of a list
-
virtual const uint8_t *get_codes(size_t list_no) const override
get the codes for an inverted list must be released by release_codes
- Returns:
codes size list_size * code_size
-
virtual const idx_t *get_ids(size_t list_no) const override
get the ids for an inverted list must be released by release_ids
- Returns:
ids size list_size
-
virtual size_t add_entries(size_t list_no, size_t n_entry, const idx_t *ids, const uint8_t *code) override
-
virtual void update_entries(size_t list_no, size_t offset, size_t n_entry, const idx_t *ids, const uint8_t *code) override
-
virtual void resize(size_t list_no, size_t new_size) override
-
size_t merge_from_multiple(const InvertedLists **ils, int n_il, bool shift_ids = false, bool verbose = false)
-
size_t merge_from_1(const InvertedLists *il, bool verbose = false)
same as merge_from for a single invlist
-
void crop_invlists(size_t l0, size_t l1)
restrict the inverted lists to l0:l1 without touching the mmapped region
-
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const override
prepare the following lists (default does nothing) a list can be -1 hence the signed long
-
~OnDiskInvertedLists() override
-
void do_mmap()
-
void update_totsize(size_t new_totsize)
-
void resize_locked(size_t list_no, size_t new_size)
-
size_t allocate_slot(size_t capacity)
-
void free_slot(size_t offset, size_t capacity)
-
void set_all_lists_sizes(const size_t *sizes)
override all list sizes and make a packed storage
-
OnDiskInvertedLists()
-
virtual void release_codes(size_t list_no, const uint8_t *codes) const
release codes returned by get_codes (default implementation is nop
-
virtual void release_ids(size_t list_no, const idx_t *ids) const
release ids returned by get_ids
-
virtual idx_t get_single_id(size_t list_no, size_t offset) const
- Returns:
a single id in an inverted list
-
virtual const uint8_t *get_single_code(size_t list_no, size_t offset) const
- Returns:
a single code in an inverted list (should be deallocated with release_codes)
-
virtual bool is_empty(size_t list_no, void *inverted_list_context = nullptr) const
check if the list is empty
-
virtual InvertedListsIterator *get_iterator(size_t list_no, void *inverted_list_context = nullptr) const
get iterable for lists that use_iterator
-
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code, void *inverted_list_context = nullptr)
add one entry to an inverted list
-
virtual void update_entry(size_t list_no, size_t offset, idx_t id, const uint8_t *code)
-
virtual void reset()
-
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
-
size_t copy_subset_to(InvertedLists &other, subset_type_t subset_type, idx_t a1, idx_t a2) const
copy a subset of the entries index to the other index
- Returns:
number of entries copied
-
double imbalance_factor() const
1= perfectly balanced, >1: imbalanced
-
void print_stats() const
display some stats about the inverted lists
-
size_t compute_ntotal() const
sum up list sizes
Public Members
-
std::string filename
-
size_t totsize
-
uint8_t *ptr
-
bool read_only
-
LockLevels *locks
-
OngoingPrefetch *pf
-
int prefetch_nthread
-
size_t nlist
number of possible key values
-
size_t code_size
code size per vector in bytes
-
bool use_iterator = false
request to use iterator rather than get_codes / get_ids
Public Static Attributes
-
static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1)
used for BlockInvertedLists, where the codes are packed into groups and the individual code size is meaningless
-
struct Slot
Public Functions
-
Slot(size_t offset, size_t capacity)
-
Slot()
Public Members
-
size_t offset
-
size_t capacity
-
Slot(size_t offset, size_t capacity)