pasta::bit_vector  v1.0.0
Compact and Fast Rank and Select Data Structure for Bit Vectors
bit_vector.hpp
1/*******************************************************************************
2 * This file is part of pasta::bit_vector.
3 *
4 * Copyright (C) 2019-2021 Florian Kurpicz <florian@kurpicz.org>
5 *
6 * pasta::bit_vector is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * pasta::bit_vector is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with pasta::bit_vector. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20
21#pragma once
22
23#include "pasta/bit_vector/support/find_l2_flat_with.hpp"
24#include "pasta/bit_vector/support/find_l2_wide_with.hpp"
25#include "pasta/bit_vector/support/optimized_for.hpp"
26#include "pasta/utils/container/aligned_vector.hpp"
27
28#include <algorithm>
29#include <cstddef>
30#include <cstdint>
31#include <iostream>
32#include <iterator>
33#include <span>
34#include <tlx/container/simple_vector.hpp>
35
36namespace pasta {
37
38/*!
39 * \brief Utility class used for the access operator of the \c BitVector.
40 *
41 * This utility class is created when the bit vector is accessed through the
42 * access operator. It cannot be created explicitly and cannot be assigned. It
43 * can only be cast to bool (for read access) or be assigned a bool (for write
44 * access).
45 */
46class BitAccess {
47 //! Forward declaration.
48 friend class BitVector;
49
50 //! 64-bit word the bit is contained in.
51 uint64_t* const data_;
52 //! Position of the bit within the 64-bit word.
53 size_t position_;
54
55public:
56 //! Deleted constructor.
57 BitAccess() = delete;
58 //! Avoiding implicit generation of the move constructor.
59 BitAccess(BitAccess&&) = delete;
60
61 //! Avoiding implicit copy assignment.
62 BitAccess& operator=(BitAccess const&) = delete;
63 //! Avoiding implicit move assignment.
65
66 /*!
67 * \brief Constructor setting 64-bit word and position of the bit in the
68 * word.
69 *
70 * This constructor can be used when a single bit in the bit vector has to
71 * be accessed and one does not want to extract the bit from the 64-bit word
72 * manually.
73 *
74 * \param data Pointer to the 64-bit word that contains the bit.
75 * \param position Position of the bit within the 64-bit word.
76 */
77 BitAccess(uint64_t* const data, size_t const position) noexcept
78 : data_(data),
79 position_(position) {}
80
81 /*!
82 * \brief User-defined conversion function to bool.
83 *
84 * Used for read access to a single bit.
85 *
86 */
87 operator bool() const noexcept {
88 uint8_t const offset = uint64_t(position_) & uint64_t(0b111111);
89 return (data_[position_ >> 6] >> offset) & 1ULL;
90 }
91
92 /*!
93 * \brief Assignment operator to set a bit within the bit vector.
94 * \param value Value the bit should be written to.
95 * \return This after the bit has been written.
96 */
97 BitAccess& operator=(bool const value) noexcept {
98 // https://graphics.stanford.edu/~seander/bithacks.html
99 // (ConditionalSetOrClearBitsWithoutBranching)
100 uint64_t const mask = 1ULL << (uint64_t(position_) & uint64_t(0b111111));
101 data_[position_ >> 6] = (data_[position_ >> 6] & ~mask) | (-value & mask);
102 return *this;
103 }
104
105 /*!
106 * \brief Computes the distance to another \c BitAccess instance.
107 *
108 * This function is only used within the iterator and usually should not be
109 * required in any other situation. It has **undefined** behavior if used
110 * for two instances from different \c BitVector.
111 * \param other The \c BitAccess instance the distance is computed to.
112 * \return The distance between \c this and the other \c BitAccess instance.
113 */
114 int64_t distance(BitAccess const& other) const noexcept {
115 return position_ - other.position_;
116 }
117
118 /*!
119 * \brief Comparison of two \c BitAccess instances for equality.
120 *
121 * This does not compare the value the \c BitAccess instance refers to but
122 * rather if the two instances refer to the same position in the same
123 * \c BitVector.
124 * \param a First \c BitAccess instance.
125 * \param b Second \c BitAccess instance.
126 * \return \c true if both instances refer to the same position in the same
127 * \c BitVector and \c false otherwise.
128 */
129 friend bool operator==(BitAccess const& a, BitAccess const& b) noexcept {
130 return a.position_ == b.position_ && a.data_ == b.data_;
131 }
132
133 /*!
134 * \brief Comparison of two \c BitAccess instances for inequality.
135 *
136 * This does not compare the value the \c BitAccess instance refers to but
137 * rather if the two instances refer to different position or to different
138 * \c BitVector.
139 * \param a First \c BitAccess instance.
140 * \param b Second \c BitAccess instance.
141 * \return \c true if both instances refer to different position or
142 * different \c BitVector and \c false otherwise.
143 */
144 friend bool operator!=(BitAccess const& a, BitAccess const& b) noexcept {
145 return a.position_ != b.position_ || a.data_ != b.data_;
146 }
147
148private:
149 /*!
150 * \brief The copy constructor is private and should not be used.
151 *
152 * We just use the copy constructor internally for the \c BitVector. There
153 * is no reason for the constructor to be called anywhere else.
154 */
155 BitAccess(BitAccess const&) = default;
156}; // class BitAccess
157
158//! \addtogroup pasta_bit_vector
159//! \{
160
161/*!\brief Uncompressed, highly tuned, fixed size bit vector
162 *
163 * The uncompressed bit vector can be used as a replacement for
164 * \c std::vector<bool>, when the size of the vector is known in advance. It
165 * provides an access operator.
166 *
167 * **Important:** If you plan on accessing the data directly, note that the
168 * bits are stored in reverse order in the 64-bit words. This saves an
169 * subtraction, when shifting the bits for access. To be more precise, the
170 * bits are stored as follows (for simplicity, we show it for 8-bit words):
171 *
172 * | 7 6 5 4 3 2 1 0 | 15 14 13 12 11 10 9 8 | 23 22 21 20 19 18 17 16 | ...
173 *
174 * \todo Add all required support to make bit vector work as a true (fixed
175 * replacement of \c std::vector<bool>).
176 * \todo Create dynamic sized bit vector (true replacement of
177 * \c std::vector<bool>).
178 */
180private:
181 //! Forward declaration.
182 template <OptimizedFor o, typename v>
183 friend class Rank;
184 //! Forward declaration.
185 template <OptimizedFor o, typename v>
186 friend class FlatRank;
187 //! Forward declaration.
188 template <OptimizedFor o, typename v>
189 friend class WideRank;
190 //! Forward declaration.
191 template <OptimizedFor o, typename v>
192 friend class RankSelect;
193 //! Forward declaration.
194 template <OptimizedFor o, FindL2FlatWith f, typename v>
195 friend class FlatRankSelect;
196 //! Forward declaration.
197 template <OptimizedFor o, FindL2WideWith f, typename v>
198 friend class WideRankSelect;
199
200public:
201 //! Type that is used to store the raw data of the bit vector.
202 using RawDataType = uint64_t;
203 //! Pointer to the data type that is used to store the raw data of the bit
204 //! vector.
206 //! Type that can be used to access constant values of the raw data used to
207 //! represent the bit vector.
209
210private:
211 //! Size of the bit vector in bits.
212 size_t bit_size_ = 0;
213 //! Size of the underlying data used to store the bits.
214 size_t size_ = 0;
215 //! Array of 64-bit words used to store the content of the bit vector.
216 tlx::SimpleVector<RawDataType, tlx::SimpleVectorMode::NoInitNoDestroy> data_;
217 //! Pointer to the raw data of the bit vector.
218 RawDataPointer raw_data_ = nullptr;
219
220public:
221 /*!
222 * \brief Custom iterator for \c BitVector
223 */
224 struct Iterator {
225 //! Iterator category.
226 using iterator_category = std::forward_iterator_tag;
227 //! Difference type.
228 using differece_type = std::ptrdiff_t;
229 //! Value type is not bit but the \c BitAccess object used for access.
231 //! Pointer type.
233 //! Reference type.
235
236 /*!
237 * \brief Constructor. Creates Iterator pointing at a bit and holds
238 * pointer to data.
239 * \param data Pointer to the beginning of the \c BitVector.
240 * \param position Position the iterator is pointing at.
241 */
242 Iterator(uint64_t* const data, size_t const position) noexcept
243 : bit_access_(data, position) {}
244
245 /*!
246 * \brief Iterator is dereferenceable. Obtain value it is pointing at.
247 * \return Reference to the bit the iterator points at.
248 */
249 reference operator*() noexcept {
250 return bit_access_;
251 }
252
253 /*!
254 * \brief Iterator is dereferenceable. Obtain value it is pointing at.
255 * \return Pointer to the bit the iterator points at.
256 */
257 pointer operator->() noexcept {
258 return &bit_access_;
259 }
260
261 //! Prefix increment.
262 Iterator& operator++() noexcept {
263 ++bit_access_.position_;
264 return *this;
265 }
266
267 //! Postfix increment.
268 Iterator operator++(int32_t) noexcept {
269 auto tmp = *this;
270 ++(*this);
271 return tmp;
272 }
273
274 //! Iterator comparison equality.
275 friend bool operator==(Iterator const& a, Iterator const& b) noexcept {
276 return a.bit_access_ == b.bit_access_;
277 }
278
279 //! Iterator comparison inequality.
280 friend bool operator!=(Iterator const& a, Iterator const& b) noexcept {
281 return a.bit_access_ != b.bit_access_;
282 }
283
284 //! Iterator distance computation.
286 Iterator const& b) noexcept {
287 return a.bit_access_.distance(b.bit_access_);
288 }
289
290 private:
291 BitAccess bit_access_;
292 }; // struct BitVector::Iterator
293
294 //! Default empty constructor.
295 BitVector() = default;
296
297 //! Deleted copy constructor.
298 BitVector(BitVector const&) = delete;
299
300 //! Deleted copy assignment.
301 BitVector& operator=(BitVector const&) = delete;
302
303 /*!
304 * \brief Constructor. Creates a bit vector that holds a specific, fixed
305 * number of bits.
306 * \param size Number of bits the bit vector contains.
307 */
308 BitVector(size_t const size) noexcept
309 : bit_size_(size),
310 size_((bit_size_ >> 6) + 1),
311 data_(size_),
312 raw_data_(data_.data()) {}
313
314 /*!
315 * \brief Constructor. Creates a bit vector that holds a specific, fixed
316 * number of bits set to a default value.
317 * \param size Number of bits the bit vector contains.
318 * \param init_value Value all bits initially are set to. Either 0
319 * (\c false) or 1 (\c true).
320 */
321 BitVector(size_t const size, bool const init_value) noexcept
322 : BitVector(size) {
323 uint64_t const fill_value = init_value ? ~(0ULL) : 0ULL;
324 std::fill_n(raw_data_, size_, fill_value);
325 }
326
327 /*!
328 * \brief Access operator to read/write to a bit of the bit vector.
329 * \param index Index of the bit to be read/write to in the bit vector.
330 * \return \c BitAccess that allows to access to a single bit.
331 */
332 BitAccess operator[](size_t const index) noexcept {
333 return BitAccess(raw_data_, index);
334 }
335
336 /*!
337 * \brief Access operator to read to a bit of the bit vector.
338 * \param index Index of the bit to be read to in the bit vector.
339 * \return \c BitAccess that allows to read access to a single bit.
340 */
341 BitAccess operator[](size_t const index) const noexcept {
342 return BitAccess(raw_data_, index);
343 }
344
345 /*!
346 * \brief Resize the bit vector to contain \c size bits.
347 * \param size Number of bits the resized bit vector contains.
348 */
349 void resize(size_t const size) noexcept {
350 bit_size_ = size;
351 size_ = (bit_size_ >> 6) + 1;
352 data_.resize(size_);
353 raw_data_ = data_.data();
354 }
355
356 /*!
357 * \brief Resize the bit vector to contain \c size bits and fill new bits with
358 * default value. \param size Number of bits the resized bit vector contains.
359 * \param init_value Value all bits that are appended to the bit vector (if
360 * any) will have.
361 */
362 void resize(size_t const size, bool const init_value) noexcept {
363 size_t const old_bit_size = bit_size_;
364 bit_size_ = size;
365 size_ = (bit_size_ >> 6) + 1;
366 data_.resize(size_);
367 raw_data_ = data_.data();
368
369 if (old_bit_size < bit_size_) {
370 size_t max_bitwise = std::min(bit_size_, ((old_bit_size + 63) / 64) * 64);
371 for (size_t i = old_bit_size; i < max_bitwise; ++i) {
372 operator[](i) = init_value;
373 }
374 size_t const old_size = (old_bit_size + 63) / 64;
375 uint64_t const fill_value = init_value ? ~(0ULL) : 0ULL;
376 std::fill_n(raw_data_ + old_size, size_ - old_size, fill_value);
377 }
378 }
379
380 /*!
381 * \brief Get iterator representing the first element of the \c BitVector.
382 * \return Iterator representing the first element of the \c BitVector.
383 */
384 Iterator begin() noexcept {
385 return Iterator(raw_data_, 0);
386 }
387
388 /*!
389 * \brief Get iterator representing the end of the \c BitVector.
390 *
391 * The \c end() method returns an iterator that may refers to an invalid
392 * memory address. It should never be accessed directly.
393 * \return Iterator representing the end of the \c BitVector.
394 */
395 Iterator end() noexcept {
396 return Iterator(raw_data_, bit_size_);
397 }
398
399 /*!
400 * \brief Direct access to the raw data of the bit vector.
401 *
402 * Note that the raw data does not contain the bits from left to right. A
403 * detailed description can be found at the top of this file.
404 * \return \c std::span<uint64_t> pointing to the bit vector's raw data.
405 */
406 std::span<uint64_t> data() noexcept {
407 return std::span{raw_data_, size_};
408 }
409
410 /*!
411 * \brief Direct access to the raw data of the bit vector.
412 *
413 * Note that the raw data does not contain the bits from left to right. A
414 * detailed description can be found at the top of this file.
415 * \return \c std::span<uint64_t> pointing to the bit vector's raw data.
416 */
417 std::span<uint64_t> data() const noexcept {
418 return std::span{raw_data_, size_};
419 }
420
421 /*!
422 * \brief Direct access to one 64-bit element of the raw data of the bit
423 * vector.
424 *
425 * Note that the raw data does not contain the bits from left to right. A
426 * detailed description can be found at the top of this file.
427 * \param index Index of the 64-bit bit word that should be returned.
428 * \return index-th 64-bit word of the raw bit vector data.
429 */
430 uint64_t data(size_t const index) const noexcept {
431 return raw_data_[index];
432 }
433
434 /*!
435 * \brief Estimate for the space usage.
436 * \return Number of bytes used by this data structure.
437 */
438 [[nodiscard("space usage computed but not used")]] size_t
439 space_usage() const {
440 return (data_.size() * sizeof(RawDataType)) + sizeof(*this);
441 }
442
443 /*!
444 * \brief Get the size of the bit vector in bits.
445 * \return Size of the bit vector in bits.
446 */
447 size_t size() const noexcept {
448 return bit_size_;
449 }
450
451 //! formatted output of the \c BitVector
452 friend std::ostream& operator<<(std::ostream& os, BitVector const& bv) {
453 for (size_t i = 0; i < bv.bit_size_; ++i) {
454 os << (bv[i] ? "1" : "0");
455 }
456 return os;
457 }
458
459}; // class BitVector
460
461//! \}
462
463} // namespace pasta
464
465/******************************************************************************/
Utility class used for the access operator of the BitVector.
Definition: bit_vector.hpp:46
BitAccess(BitAccess &&)=delete
Avoiding implicit generation of the move constructor.
int64_t distance(BitAccess const &other) const noexcept
Computes the distance to another BitAccess instance.
Definition: bit_vector.hpp:114
BitAccess & operator=(BitAccess const &)=delete
Avoiding implicit copy assignment.
BitAccess & operator=(bool const value) noexcept
Assignment operator to set a bit within the bit vector.
Definition: bit_vector.hpp:97
BitAccess()=delete
Deleted constructor.
BitAccess & operator=(BitAccess &&)=delete
Avoiding implicit move assignment.
friend bool operator==(BitAccess const &a, BitAccess const &b) noexcept
Comparison of two BitAccess instances for equality.
Definition: bit_vector.hpp:129
BitAccess(uint64_t *const data, size_t const position) noexcept
Constructor setting 64-bit word and position of the bit in the word.
Definition: bit_vector.hpp:77
friend bool operator!=(BitAccess const &a, BitAccess const &b) noexcept
Comparison of two BitAccess instances for inequality.
Definition: bit_vector.hpp:144
Uncompressed, highly tuned, fixed size bit vector.
Definition: bit_vector.hpp:179
Iterator begin() noexcept
Get iterator representing the first element of the BitVector.
Definition: bit_vector.hpp:384
BitAccess operator[](size_t const index) const noexcept
Access operator to read to a bit of the bit vector.
Definition: bit_vector.hpp:341
BitVector & operator=(BitVector const &)=delete
Deleted copy assignment.
uint64_t RawDataType
Type that is used to store the raw data of the bit vector.
Definition: bit_vector.hpp:202
BitVector()=default
Default empty constructor.
RawDataType const * RawDataConstAccess
Definition: bit_vector.hpp:208
friend std::ostream & operator<<(std::ostream &os, BitVector const &bv)
formatted output of the BitVector
Definition: bit_vector.hpp:452
std::span< uint64_t > data() const noexcept
Direct access to the raw data of the bit vector.
Definition: bit_vector.hpp:417
RawDataType * RawDataPointer
Definition: bit_vector.hpp:205
size_t size() const noexcept
Get the size of the bit vector in bits.
Definition: bit_vector.hpp:447
BitVector(size_t const size, bool const init_value) noexcept
Constructor. Creates a bit vector that holds a specific, fixed number of bits set to a default value.
Definition: bit_vector.hpp:321
uint64_t data(size_t const index) const noexcept
Direct access to one 64-bit element of the raw data of the bit vector.
Definition: bit_vector.hpp:430
Iterator end() noexcept
Get iterator representing the end of the BitVector.
Definition: bit_vector.hpp:395
BitVector(BitVector const &)=delete
Deleted copy constructor.
void resize(size_t const size) noexcept
Resize the bit vector to contain size bits.
Definition: bit_vector.hpp:349
std::span< uint64_t > data() noexcept
Direct access to the raw data of the bit vector.
Definition: bit_vector.hpp:406
void resize(size_t const size, bool const init_value) noexcept
Resize the bit vector to contain size bits and fill new bits with default value.
Definition: bit_vector.hpp:362
size_t space_usage() const
Estimate for the space usage.
Definition: bit_vector.hpp:439
BitVector(size_t const size) noexcept
Constructor. Creates a bit vector that holds a specific, fixed number of bits.
Definition: bit_vector.hpp:308
BitAccess operator[](size_t const index) noexcept
Access operator to read/write to a bit of the bit vector.
Definition: bit_vector.hpp:332
Select support for BitVector that can be used as an alternative to RankSelect for bit vectors up to l...
Definition: flat_rank_select.hpp:73
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition: flat_rank.hpp:82
Select support for BitVector.
Definition: rank_select.hpp:77
Rank support for BitVector.
Definition: rank.hpp:102
Select support for BitVector.
Definition: wide_rank_select.hpp:62
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition: wide_rank.hpp:76
Custom iterator for BitVector.
Definition: bit_vector.hpp:224
friend bool operator==(Iterator const &a, Iterator const &b) noexcept
Iterator comparison equality.
Definition: bit_vector.hpp:275
Iterator operator++(int32_t) noexcept
Postfix increment.
Definition: bit_vector.hpp:268
Iterator & operator++() noexcept
Prefix increment.
Definition: bit_vector.hpp:262
std::forward_iterator_tag iterator_category
Iterator category.
Definition: bit_vector.hpp:226
Iterator(uint64_t *const data, size_t const position) noexcept
Constructor. Creates Iterator pointing at a bit and holds pointer to data.
Definition: bit_vector.hpp:242
friend bool operator!=(Iterator const &a, Iterator const &b) noexcept
Iterator comparison inequality.
Definition: bit_vector.hpp:280
pointer operator->() noexcept
Iterator is dereferenceable. Obtain value it is pointing at.
Definition: bit_vector.hpp:257
friend differece_type operator-(Iterator const &a, Iterator const &b) noexcept
Iterator distance computation.
Definition: bit_vector.hpp:285
std::ptrdiff_t differece_type
Difference type.
Definition: bit_vector.hpp:228
reference operator*() noexcept
Iterator is dereferenceable. Obtain value it is pointing at.
Definition: bit_vector.hpp:249