pasta_bit_vector  1.0.1
Bit Vector with Compact and Fast Rank and Select Support
Loading...
Searching...
No Matches
bit_vector.hpp
1/*******************************************************************************
2 * This file is part of pasta::bit_vector.
3 *
4 * Copyright (C) 2019-2021 Florian Kurpicz <florian@kurpicz.org>
5 *
6 * pasta::bit_vector is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * pasta::bit_vector is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with pasta::bit_vector. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20
21#pragma once
22
23#include "pasta/bit_vector/support/find_l2_flat_with.hpp"
24#include "pasta/bit_vector/support/find_l2_wide_with.hpp"
26
27#include <algorithm>
28#include <cstddef>
29#include <cstdint>
30#include <iostream>
31#include <iterator>
32#include <span>
33#include <tlx/container/simple_vector.hpp>
34
35namespace pasta {
36
37/*!
38 * \brief Utility class used for the access operator of the \c BitVector.
39 *
40 * This utility class is created when the bit vector is accessed through the
41 * access operator. It cannot be created explicitly and cannot be assigned. It
42 * can only be cast to bool (for read access) or be assigned a bool (for write
43 * access).
44 */
45class BitAccess {
46 //! Forward declaration.
47 friend class BitVector;
48
49 //! 64-bit word the bit is contained in.
50 uint64_t* const data_;
51 //! Position of the bit within the 64-bit word.
52 size_t position_;
53
54public:
55 //! Deleted constructor.
56 BitAccess() = delete;
57 //! Avoiding implicit generation of the move constructor.
58 BitAccess(BitAccess&&) = delete;
59
60 //! Avoiding implicit copy assignment.
61 BitAccess& operator=(BitAccess const&) = delete;
62 //! Avoiding implicit move assignment.
64
65 /*!
66 * \brief Constructor setting 64-bit word and position of the bit in the
67 * word.
68 *
69 * This constructor can be used when a single bit in the bit vector has to
70 * be accessed and one does not want to extract the bit from the 64-bit word
71 * manually.
72 *
73 * \param data Pointer to the 64-bit word that contains the bit.
74 * \param position Position of the bit within the 64-bit word.
75 */
76 BitAccess(uint64_t* const data, size_t const position) noexcept
77 : data_(data),
78 position_(position) {}
79
80 /*!
81 * \brief User-defined conversion function to bool.
82 *
83 * Used for read access to a single bit.
84 *
85 */
86 operator bool() const noexcept {
87 uint8_t const offset = uint64_t(position_) & uint64_t(0b111111);
88 return (data_[position_ >> 6] >> offset) & 1ULL;
89 }
90
91 /*!
92 * \brief Assignment operator to set a bit within the bit vector.
93 * \param value Value the bit should be written to.
94 * \return This after the bit has been written.
95 */
96 BitAccess& operator=(bool const value) noexcept {
97 // https://graphics.stanford.edu/~seander/bithacks.html
98 // (ConditionalSetOrClearBitsWithoutBranching)
99 uint64_t const mask = 1ULL << (uint64_t(position_) & uint64_t(0b111111));
100 data_[position_ >> 6] = (data_[position_ >> 6] & ~mask) | (-value & mask);
101 return *this;
102 }
103
104 /*!
105 * \brief Computes the distance to another \c BitAccess instance.
106 *
107 * This function is only used within the iterator and usually should not be
108 * required in any other situation. It has **undefined** behavior if used
109 * for two instances from different \c BitVector.
110 * \param other The \c BitAccess instance the distance is computed to.
111 * \return The distance between \c this and the other \c BitAccess instance.
112 */
113 int64_t distance(BitAccess const& other) const noexcept {
114 return position_ - other.position_;
115 }
116
117 /*!
118 * \brief Comparison of two \c BitAccess instances for equality.
119 *
120 * This does not compare the value the \c BitAccess instance refers to but
121 * rather if the two instances refer to the same position in the same
122 * \c BitVector.
123 * \param a First \c BitAccess instance.
124 * \param b Second \c BitAccess instance.
125 * \return \c true if both instances refer to the same position in the same
126 * \c BitVector and \c false otherwise.
127 */
128 friend bool operator==(BitAccess const& a, BitAccess const& b) noexcept {
129 return a.position_ == b.position_ && a.data_ == b.data_;
130 }
131
132 /*!
133 * \brief Comparison of two \c BitAccess instances for inequality.
134 *
135 * This does not compare the value the \c BitAccess instance refers to but
136 * rather if the two instances refer to different position or to different
137 * \c BitVector.
138 * \param a First \c BitAccess instance.
139 * \param b Second \c BitAccess instance.
140 * \return \c true if both instances refer to different position or
141 * different \c BitVector and \c false otherwise.
142 */
143 friend bool operator!=(BitAccess const& a, BitAccess const& b) noexcept {
144 return a.position_ != b.position_ || a.data_ != b.data_;
145 }
146
147private:
148 /*!
149 * \brief The copy constructor is private and should not be used.
150 *
151 * We just use the copy constructor internally for the \c BitVector. There
152 * is no reason for the constructor to be called anywhere else.
153 */
154 BitAccess(BitAccess const&) = default;
155}; // class BitAccess
156
157//! \addtogroup pasta_bit_vector
158//! \{
159
160/*!\brief Uncompressed, highly tuned, fixed size bit vector
161 *
162 * The uncompressed bit vector can be used as a replacement for
163 * \c std::vector<bool>, when the size of the vector is known in advance. It
164 * provides an access operator.
165 *
166 * **Important:** If you plan on accessing the data directly, note that the
167 * bits are stored in reverse order in the 64-bit words. This saves an
168 * subtraction, when shifting the bits for access. To be more precise, the
169 * bits are stored as follows (for simplicity, we show it for 8-bit words):
170 *
171 * | 7 6 5 4 3 2 1 0 | 15 14 13 12 11 10 9 8 | 23 22 21 20 19 18 17 16 | ...
172 *
173 * \todo Add all required support to make bit vector work as a true (fixed
174 * replacement of \c std::vector<bool>).
175 * \todo Create dynamic sized bit vector (true replacement of
176 * \c std::vector<bool>).
177 */
179public:
180 //! Type that is used to store the raw data of the bit vector.
181 using RawDataType = uint64_t;
182 //! Pointer to the data type that is used to store the raw data of the bit
183 //! vector.
185 //! Type that can be used to access constant values of the raw data used to
186 //! represent the bit vector.
188
189private:
190 //! Size of the bit vector in bits.
191 size_t bit_size_ = 0;
192 //! Size of the underlying data used to store the bits.
193 size_t size_ = 0;
194 //! Array of 64-bit words used to store the content of the bit vector.
195 tlx::SimpleVector<RawDataType, tlx::SimpleVectorMode::NoInitNoDestroy> data_;
196 //! Pointer to the raw data of the bit vector.
197 RawDataPointer raw_data_ = nullptr;
198
199public:
200 /*!
201 * \brief Custom iterator for \c BitVector
202 */
203 struct Iterator {
204 //! Iterator category.
205 using iterator_category = std::forward_iterator_tag;
206 //! Difference type.
207 using differece_type = std::ptrdiff_t;
208 //! Value type is not bit but the \c BitAccess object used for access.
210 //! Pointer type.
212 //! Reference type.
214
215 /*!
216 * \brief Constructor. Creates Iterator pointing at a bit and holds
217 * pointer to data.
218 * \param data Pointer to the beginning of the \c BitVector.
219 * \param position Position the iterator is pointing at.
220 */
221 Iterator(uint64_t* const data, size_t const position) noexcept
222 : bit_access_(data, position) {}
223
224 /*!
225 * \brief Iterator is dereferenceable. Obtain value it is pointing at.
226 * \return Reference to the bit the iterator points at.
227 */
228 reference operator*() noexcept {
229 return bit_access_;
230 }
231
232 /*!
233 * \brief Iterator is dereferenceable. Obtain value it is pointing at.
234 * \return Pointer to the bit the iterator points at.
235 */
236 pointer operator->() noexcept {
237 return &bit_access_;
238 }
239
240 //! Prefix increment.
241 Iterator& operator++() noexcept {
242 ++bit_access_.position_;
243 return *this;
244 }
245
246 //! Postfix increment.
247 Iterator operator++(int32_t) noexcept {
248 auto tmp = *this;
249 ++(*this);
250 return tmp;
251 }
252
253 //! Iterator comparison equality.
254 friend bool operator==(Iterator const& a, Iterator const& b) noexcept {
255 return a.bit_access_ == b.bit_access_;
256 }
257
258 //! Iterator comparison inequality.
259 friend bool operator!=(Iterator const& a, Iterator const& b) noexcept {
260 return a.bit_access_ != b.bit_access_;
261 }
262
263 //! Iterator distance computation.
265 Iterator const& b) noexcept {
266 return a.bit_access_.distance(b.bit_access_);
267 }
268
269 private:
270 BitAccess bit_access_;
271 }; // struct BitVector::Iterator
272
273 //! Default empty constructor.
274 BitVector() = default;
275
276 //! Deleted copy constructor.
277 BitVector(BitVector const&) = delete;
278
279 //! Deleted copy assignment.
280 BitVector& operator=(BitVector const&) = delete;
281
282 /*!
283 * \brief Constructor. Creates a bit vector that holds a specific, fixed
284 * number of bits.
285 * \param size Number of bits the bit vector contains.
286 */
287 BitVector(size_t const size) noexcept
288 : bit_size_(size),
289 size_((bit_size_ >> 6) + 1),
290 data_(size_),
291 raw_data_(data_.data()) {}
292
293 /*!
294 * \brief Constructor. Creates a bit vector that holds a specific, fixed
295 * number of bits set to a default value.
296 * \param size Number of bits the bit vector contains.
297 * \param init_value Value all bits initially are set to. Either 0
298 * (\c false) or 1 (\c true).
299 */
300 BitVector(size_t const size, bool const init_value) noexcept
301 : BitVector(size) {
302 uint64_t const fill_value = init_value ? ~(0ULL) : 0ULL;
303 std::fill_n(raw_data_, size_, fill_value);
304 }
305
306 /*!
307 * \brief Access operator to read/write to a bit of the bit vector.
308 * \param index Index of the bit to be read/write to in the bit vector.
309 * \return \c BitAccess that allows to access to a single bit.
310 */
311 BitAccess operator[](size_t const index) noexcept {
312 return BitAccess(raw_data_, index);
313 }
314
315 /*!
316 * \brief Access operator to read to a bit of the bit vector.
317 * \param index Index of the bit to be read to in the bit vector.
318 * \return \c BitAccess that allows to read access to a single bit.
319 */
320 BitAccess operator[](size_t const index) const noexcept {
321 return BitAccess(raw_data_, index);
322 }
323
324 /*!
325 * \brief Resize the bit vector to contain \c size bits.
326 * \param size Number of bits the resized bit vector contains.
327 */
328 void resize(size_t const size) noexcept {
329 bit_size_ = size;
330 size_ = (bit_size_ >> 6) + 1;
331 data_.resize(size_);
332 raw_data_ = data_.data();
333 }
334
335 /*!
336 * \brief Resize the bit vector to contain \c size bits and fill new bits with
337 * default value. \param size Number of bits the resized bit vector contains.
338 * \param init_value Value all bits that are appended to the bit vector (if
339 * any) will have.
340 */
341 void resize(size_t const size, bool const init_value) noexcept {
342 size_t const old_bit_size = bit_size_;
343 bit_size_ = size;
344 size_ = (bit_size_ >> 6) + 1;
345 data_.resize(size_);
346 raw_data_ = data_.data();
347
348 if (old_bit_size < bit_size_) {
349 size_t max_bitwise = std::min(bit_size_, ((old_bit_size + 63) / 64) * 64);
350 for (size_t i = old_bit_size; i < max_bitwise; ++i) {
351 operator[](i) = init_value;
352 }
353 size_t const old_size = (old_bit_size + 63) / 64;
354 uint64_t const fill_value = init_value ? ~(0ULL) : 0ULL;
355 std::fill_n(raw_data_ + old_size, size_ - old_size, fill_value);
356 }
357 }
358
359 /*!
360 * \brief Get iterator representing the first element of the \c BitVector.
361 * \return Iterator representing the first element of the \c BitVector.
362 */
363 Iterator begin() noexcept {
364 return Iterator(raw_data_, 0);
365 }
366
367 /*!
368 * \brief Get iterator representing the end of the \c BitVector.
369 *
370 * The \c end() method returns an iterator that may refers to an invalid
371 * memory address. It should never be accessed directly.
372 * \return Iterator representing the end of the \c BitVector.
373 */
374 Iterator end() noexcept {
375 return Iterator(raw_data_, bit_size_);
376 }
377
378 /*!
379 * \brief Direct access to the raw data of the bit vector.
380 *
381 * Note that the raw data does not contain the bits from left to right. A
382 * detailed description can be found at the top of this file.
383 * \return \c std::span<uint64_t> pointing to the bit vector's raw data.
384 */
385 std::span<uint64_t> data() noexcept {
386 return std::span{raw_data_, size_};
387 }
388
389 /*!
390 * \brief Direct access to the raw data of the bit vector.
391 *
392 * Note that the raw data does not contain the bits from left to right. A
393 * detailed description can be found at the top of this file.
394 * \return \c std::span<uint64_t> pointing to the bit vector's raw data.
395 */
396 std::span<uint64_t> data() const noexcept {
397 return std::span{raw_data_, size_};
398 }
399
400 /*!
401 * \brief Direct access to one 64-bit element of the raw data of the bit
402 * vector.
403 *
404 * Note that the raw data does not contain the bits from left to right. A
405 * detailed description can be found at the top of this file.
406 * \param index Index of the 64-bit bit word that should be returned.
407 * \return index-th 64-bit word of the raw bit vector data.
408 */
409 uint64_t data(size_t const index) const noexcept {
410 return raw_data_[index];
411 }
412
413 /*!
414 * \brief Estimate for the space usage.
415 * \return Number of bytes used by this data structure.
416 */
417 [[nodiscard("space usage computed but not used")]] size_t
418 space_usage() const {
419 return (data_.size() * sizeof(RawDataType)) + sizeof(*this);
420 }
421
422 /*!
423 * \brief Get the size of the bit vector in bits.
424 * \return Size of the bit vector in bits.
425 */
426 size_t size() const noexcept {
427 return bit_size_;
428 }
429
430 //! formatted output of the \c BitVector
431 friend std::ostream& operator<<(std::ostream& os, BitVector const& bv) {
432 for (size_t i = 0; i < bv.bit_size_; ++i) {
433 os << (bv[i] ? "1" : "0");
434 }
435 return os;
436 }
437
438}; // class BitVector
439
440//! \}
441
442} // namespace pasta
443
444/******************************************************************************/
Utility class used for the access operator of the BitVector.
Definition bit_vector.hpp:45
BitAccess(BitAccess &&)=delete
Avoiding implicit generation of the move constructor.
int64_t distance(BitAccess const &other) const noexcept
Computes the distance to another BitAccess instance.
Definition bit_vector.hpp:113
BitAccess & operator=(BitAccess const &)=delete
Avoiding implicit copy assignment.
BitAccess & operator=(bool const value) noexcept
Assignment operator to set a bit within the bit vector.
Definition bit_vector.hpp:96
BitAccess()=delete
Deleted constructor.
BitAccess & operator=(BitAccess &&)=delete
Avoiding implicit move assignment.
friend bool operator==(BitAccess const &a, BitAccess const &b) noexcept
Comparison of two BitAccess instances for equality.
Definition bit_vector.hpp:128
BitAccess(uint64_t *const data, size_t const position) noexcept
Constructor setting 64-bit word and position of the bit in the word.
Definition bit_vector.hpp:76
friend bool operator!=(BitAccess const &a, BitAccess const &b) noexcept
Comparison of two BitAccess instances for inequality.
Definition bit_vector.hpp:143
Uncompressed, highly tuned, fixed size bit vector.
Definition bit_vector.hpp:178
Iterator begin() noexcept
Get iterator representing the first element of the BitVector.
Definition bit_vector.hpp:363
BitAccess operator[](size_t const index) const noexcept
Access operator to read to a bit of the bit vector.
Definition bit_vector.hpp:320
BitVector & operator=(BitVector const &)=delete
Deleted copy assignment.
uint64_t RawDataType
Type that is used to store the raw data of the bit vector.
Definition bit_vector.hpp:181
BitVector()=default
Default empty constructor.
RawDataType const * RawDataConstAccess
Definition bit_vector.hpp:187
friend std::ostream & operator<<(std::ostream &os, BitVector const &bv)
formatted output of the BitVector
Definition bit_vector.hpp:431
std::span< uint64_t > data() const noexcept
Direct access to the raw data of the bit vector.
Definition bit_vector.hpp:396
RawDataType * RawDataPointer
Definition bit_vector.hpp:184
size_t size() const noexcept
Get the size of the bit vector in bits.
Definition bit_vector.hpp:426
BitVector(size_t const size, bool const init_value) noexcept
Constructor. Creates a bit vector that holds a specific, fixed number of bits set to a default value.
Definition bit_vector.hpp:300
uint64_t data(size_t const index) const noexcept
Direct access to one 64-bit element of the raw data of the bit vector.
Definition bit_vector.hpp:409
Iterator end() noexcept
Get iterator representing the end of the BitVector.
Definition bit_vector.hpp:374
BitVector(BitVector const &)=delete
Deleted copy constructor.
void resize(size_t const size) noexcept
Resize the bit vector to contain size bits.
Definition bit_vector.hpp:328
std::span< uint64_t > data() noexcept
Direct access to the raw data of the bit vector.
Definition bit_vector.hpp:385
void resize(size_t const size, bool const init_value) noexcept
Resize the bit vector to contain size bits and fill new bits with default value.
Definition bit_vector.hpp:341
size_t space_usage() const
Estimate for the space usage.
Definition bit_vector.hpp:418
BitVector(size_t const size) noexcept
Constructor. Creates a bit vector that holds a specific, fixed number of bits.
Definition bit_vector.hpp:287
BitAccess operator[](size_t const index) noexcept
Access operator to read/write to a bit of the bit vector.
Definition bit_vector.hpp:311
Custom iterator for BitVector.
Definition bit_vector.hpp:203
friend bool operator==(Iterator const &a, Iterator const &b) noexcept
Iterator comparison equality.
Definition bit_vector.hpp:254
Iterator operator++(int32_t) noexcept
Postfix increment.
Definition bit_vector.hpp:247
Iterator & operator++() noexcept
Prefix increment.
Definition bit_vector.hpp:241
std::forward_iterator_tag iterator_category
Iterator category.
Definition bit_vector.hpp:205
Iterator(uint64_t *const data, size_t const position) noexcept
Constructor. Creates Iterator pointing at a bit and holds pointer to data.
Definition bit_vector.hpp:221
friend bool operator!=(Iterator const &a, Iterator const &b) noexcept
Iterator comparison inequality.
Definition bit_vector.hpp:259
pointer operator->() noexcept
Iterator is dereferenceable. Obtain value it is pointing at.
Definition bit_vector.hpp:236
friend differece_type operator-(Iterator const &a, Iterator const &b) noexcept
Iterator distance computation.
Definition bit_vector.hpp:264
std::ptrdiff_t differece_type
Difference type.
Definition bit_vector.hpp:207
reference operator*() noexcept
Iterator is dereferenceable. Obtain value it is pointing at.
Definition bit_vector.hpp:228