Struct faiss::OnDiskInvertedLists
-
struct faiss::OnDiskInvertedLists : public faiss::InvertedLists
On-disk storage of inverted lists.
The data is stored in a mmapped chunk of memory (base ptointer ptr, size totsize). Each list is a range of memory that contains (object List) that contains:
uint8_t codes[capacity * code_size]
followed by idx_t ids[capacity]
in each of the arrays, the size <= capacity first elements are used, the rest is not initialized.
Addition and resize are supported by:
roundind up the capacity of the lists to a power of two
maintaining a list of empty slots, sorted by size.
resizing the mmapped block is adjusted as needed.
An OnDiskInvertedLists is compact if the size == capacity for all lists and there are no available slots.
Addition to the invlists is slow. For incremental add it is better to use a default ArrayInvertedLists object and convert it to an OnDisk with merge_from.
When it is known that a set of lists will be accessed, it is useful to call prefetch_lists, that launches a set of threads to read the lists in parallel.
Public Functions
-
OnDiskInvertedLists(size_t nlist, size_t code_size, const char *filename)
are inverted lists mapped read-only
-
virtual size_t list_size(size_t list_no) const override
get the size of a list
-
virtual const uint8_t *get_codes(size_t list_no) const override
get the codes for an inverted list must be released by release_codes
- Returns
codes size list_size * code_size
-
virtual const idx_t *get_ids(size_t list_no) const override
get the ids for an inverted list must be released by release_ids
- Returns
ids size list_size
-
virtual size_t add_entries(size_t list_no, size_t n_entry, const idx_t *ids, const uint8_t *code) override
-
virtual void update_entries(size_t list_no, size_t offset, size_t n_entry, const idx_t *ids, const uint8_t *code) override
-
virtual void resize(size_t list_no, size_t new_size) override
-
size_t merge_from(const InvertedLists **ils, int n_il, bool verbose = false)
-
size_t merge_from_1(const InvertedLists *il, bool verbose = false)
same as merge_from for a single invlist
-
void crop_invlists(size_t l0, size_t l1)
restrict the inverted lists to l0:l1 without touching the mmapped region
-
virtual void prefetch_lists(const idx_t *list_nos, int nlist) const override
prepare the following lists (default does nothing) a list can be -1 hence the signed long
-
~OnDiskInvertedLists() override
-
void do_mmap()
-
void update_totsize(size_t new_totsize)
-
void resize_locked(size_t list_no, size_t new_size)
-
size_t allocate_slot(size_t capacity)
-
void free_slot(size_t offset, size_t capacity)
-
void set_all_lists_sizes(const size_t *sizes)
override all list sizes and make a packed storage
-
OnDiskInvertedLists()
-
virtual void release_codes(size_t list_no, const uint8_t *codes) const
release codes returned by get_codes (default implementation is nop
-
virtual void release_ids(size_t list_no, const idx_t *ids) const
release ids returned by get_ids
-
virtual idx_t get_single_id(size_t list_no, size_t offset) const
- Returns
a single id in an inverted list
-
virtual const uint8_t *get_single_code(size_t list_no, size_t offset) const
- Returns
a single code in an inverted list (should be deallocated with release_codes)
-
virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t *code)
add one entry to an inverted list
-
virtual void update_entry(size_t list_no, size_t offset, idx_t id, const uint8_t *code)
-
virtual void reset()
-
void merge_from(InvertedLists *oivf, size_t add_id)
move all entries from oivf (empty on output)
-
double imbalance_factor() const
1= perfectly balanced, >1: imbalanced
-
void print_stats() const
display some stats about the inverted lists
-
size_t compute_ntotal() const
sum up list sizes
Public Members
-
std::vector<List> lists
-
std::list<Slot> slots
-
std::string filename
-
size_t totsize
-
uint8_t *ptr
-
bool read_only
-
LockLevels *locks
-
OngoingPrefetch *pf
-
int prefetch_nthread
-
size_t nlist
number of possible key values
-
size_t code_size
code size per vector in bytes
Public Static Attributes
-
static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1)
used for BlockInvertedLists, where the codes are packed into groups and the individual code size is meaningless
-
struct Slot
Public Functions
-
Slot(size_t offset, size_t capacity)
-
Slot()
Public Members
-
size_t offset
-
size_t capacity
-
Slot(size_t offset, size_t capacity)