pasta::bit_vector  v1.0.0
Compact and Fast Rank and Select Data Structure for Bit Vectors
wide_rank_select.hpp
1/*******************************************************************************
2 * This file is part of pasta::bit_vector.
3 *
4 * Copyright (C) 2021 Florian Kurpicz <florian@kurpicz.org>
5 *
6 * pasta::bit_vector is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * pasta::bit_vector is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with pasta::bit_vector. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20
21#pragma once
22
23#include "pasta/bit_vector/bit_vector.hpp"
24#include "pasta/bit_vector/support/find_l2_wide_with.hpp"
25#include "pasta/bit_vector/support/optimized_for.hpp"
26#include "pasta/bit_vector/support/popcount.hpp"
27#include "pasta/bit_vector/support/select.hpp"
28#include "pasta/bit_vector/support/wide_rank.hpp"
29
30#include <cstddef>
31#include <cstdint>
32#include <limits>
33#include <tlx/container/simple_vector.hpp>
34#include <tlx/math.hpp>
35#include <vector>
36
37namespace pasta {
38
39//! \addtogroup pasta_bit_vector_rank_select
40//! \{
41
42/*!
43 * \brief Select support for \c BitVector
44 *
45 * The rank and select support is based on popcount and described in detail by
46 * Zhou et al. \cite ZhouAK2013PopcountRankSelect. The data structure consists
47 * of three levels (L0, L1, and L2) that contain different information
48 * regarding the popcount of the current block or all previous blocks.
49 *
50 * Since the rank data structures used by rank are a strict subset of the data
51 * structures required by select, we provide a (ever so slightly) more
52 * space-efficient rank only data structure in \c BitVectorRank.
53 *
54 * \tparam OptimizedFor Compile time option to optimize data structure for
55 * either 0, 1, or no specific type of query.
56 * \tparam VectorType Type of the vector the rank and select data structure is
57 * constructed for, e.g., plain \c BitVector or a compressed bit vector.
58 */
59template <OptimizedFor optimized_for = OptimizedFor::DONT_CARE,
60 FindL2WideWith find_with = FindL2WideWith::LINEAR_SEARCH,
61 typename VectorType = BitVector>
62class WideRankSelect : public WideRank<optimized_for> {
63 //! Get access to protected members of base class, as dependent
64 //! names are not considered.
65 using WideRank<optimized_for>::data_size_;
66 //! Get access to protected members of base class, as dependent
67 //! names are not considered.
68 using WideRank<optimized_for>::data_;
69 //! Get access to protected members of base class, as dependent
70 //! names are not considered.
71 using WideRank<optimized_for>::l1_;
72 //! Get access to protected members of base class, as dependent
73 //! names are not considered.
74 using WideRank<optimized_for>::l2_;
75
76 template <typename T>
77 using Array = tlx::SimpleVector<T, tlx::SimpleVectorMode::NoInitNoDestroy>;
78
79 // Members for the structure (needed only for select)
80 //! Positions of every \c SELECT_SAMPLE_RATE zero.
81 std::vector<uint32_t> samples0_;
82 //! Positions of every \c SELECT_SAMPLE_RATE one.
83 std::vector<uint32_t> samples1_;
84
85public:
86 //! Default constructor w/o parameter.
87 WideRankSelect() = default;
88 /*!
89 * \brief Constructor. Creates the auxiliary information for
90 * efficient rank and select queries.
91 *
92 * \param bv Vector of type \c VectorType the rank and select structure is
93 * created for.
94 */
95 WideRankSelect(VectorType& bv) : WideRank<optimized_for, VectorType>(bv) {
96 init();
97 }
98
99 //! Default move constructor.
100 WideRankSelect(WideRankSelect&& other) = default;
101
102 //! Default move assignment.
104
105 //! Destructor. Deleting manually created arrays.
106 ~WideRankSelect() = default;
107
108 /*!
109 * \brief Get position of specific zero, i.e., select.
110 * \param rank Rank of zero the position is searched for.
111 * \return Position of the rank-th zero.
112 */
113 [[nodiscard("select0 computed but not used")]] size_t
114 select0(size_t rank) const {
115 size_t const l1_end = l1_.size();
116 size_t const l2_end = l2_.size();
117
118 size_t l2_pos = ((rank - 1) / WideRankSelectConfig::SELECT_SAMPLE_RATE);
119 size_t l1_pos = l2_pos / 128;
120
121 if constexpr (optimize_one_or_dont_care(optimized_for)) {
122 while (l1_pos + 1 < l1_end &&
123 ((l1_pos + 1) * WideRankSelectConfig::L1_BIT_SIZE) -
124 l1_[l1_pos + 1] <
125 rank) {
126 ++l1_pos;
127 }
128 rank -= (l1_pos * WideRankSelectConfig::L1_BIT_SIZE) - l1_[l1_pos];
129 } else {
130 while (l1_pos + 1 < l1_end && l1_[l1_pos + 1] < rank) {
131 ++l1_pos;
132 }
133 rank -= l1_[l1_pos];
134 }
135
136 l2_pos = std::max(l1_pos * 128, l2_pos);
137
138 if constexpr (use_linear_search(find_with)) {
139 if constexpr (optimize_one_or_dont_care(optimized_for)) {
140 size_t added = 0;
141 while (l2_pos + 1 < l2_end &&
142 ((added + 1) * WideRankSelectConfig::L2_BIT_SIZE) -
143 l2_[l2_pos + 1] <
144 rank) {
145 ++l2_pos;
146 ++added;
147 }
148 rank -= (added * WideRankSelectConfig::L2_BIT_SIZE) - l2_[l2_pos];
149 } else {
150 while (l2_pos + 1 < l2_end && l2_[l2_pos + 1] < rank) {
151 ++l2_pos;
152 }
153 rank -= l2_[l2_pos];
154 }
155 } else if constexpr (use_binary_search(find_with)) {
156 size_t const end = std::min((l1_pos + 1) * 128, l2_end);
157 size_t const iterations = tlx::integer_log2_ceil(end - l2_pos + 1);
158 size_t size = 1ULL << (iterations - 1);
159 size_t mid = end - size;
160 size >>= 1;
161 // Starting positions of the next two intervals which we are
162 // going to use to prefetch the potential next comparison positions (mid).
163 size_t start_next_left = l2_pos;
164 size_t start_next_right = mid + 1;
165
166 if constexpr (optimize_one_or_dont_care(optimized_for)) {
167 size_t const offset = (128 * l1_pos);
168 while (size > 0) {
169 // The search space does not fit into one cache line, so we do
170 // some prefetching. We assume that a cache line has size 64
171 // bytes (until \c
172 // std::hardware_constructive_interference_size is finally
173 // available in gcc) and we have an array containing 2 byte
174 // elements (l2).
175 if (size > 16) {
176 __builtin_prefetch(&l2_[start_next_left + size], 0, 0);
177 __builtin_prefetch(&l2_[start_next_right + size], 0, 0);
178 }
179 start_next_left =
180 (rank > ((mid - offset) * WideRankSelectConfig::L2_BIT_SIZE) -
181 l2_[mid]) ?
182 start_next_right :
183 start_next_left;
184 start_next_right = start_next_left + size;
185 mid = start_next_left + size - 1;
186 size >>= 1;
187 }
188 l2_pos = (rank > ((mid - offset) * WideRankSelectConfig::L2_BIT_SIZE) -
189 l2_[mid]) ?
190 mid :
191 start_next_left - 1;
192 rank -= ((l2_pos - offset) * WideRankSelectConfig::L2_BIT_SIZE) -
193 l2_[l2_pos];
194 } else {
195 while (size > 0) {
196 // The search space does not fit into one cache line, so we do
197 // some prefetching. We assume that a cache line has size 64
198 // bytes (until \c
199 // std::hardware_constructive_interference_size is finally
200 // available in gcc) and we have an array containing 2 byte
201 // elements (l2).
202 if (size > 16) {
203 __builtin_prefetch(&l2_[start_next_left + size], 0, 0);
204 __builtin_prefetch(&l2_[start_next_right + size], 0, 0);
205 }
206 start_next_left =
207 (rank > l2_[mid]) ? start_next_right : start_next_left;
208 start_next_right = start_next_left + size;
209 mid = start_next_left + size - 1;
210 size >>= 1;
211 }
212 l2_pos = (rank > l2_[mid]) ? mid : start_next_left - 1;
213 rank -= l2_[l2_pos];
214 }
215 }
216
217 size_t last_pos = l2_pos * WideRankSelectConfig::L2_WORD_SIZE;
218 size_t popcount = 0;
219
220 while ((popcount = pasta::popcount_zeros<1>(data_ + last_pos)) < rank) {
221 ++last_pos;
222 rank -= popcount;
223 }
224 return (last_pos * 64) + select(~data_[last_pos], rank - 1);
225 }
226
227 /*!
228 * \brief Get position of specific one, i.e., select.
229 * \param rank Rank of one the position is searched for.
230 * \return Position of the rank-th one.
231 */
232 [[nodiscard("select1 computed but not used")]] size_t
233 select1(size_t rank) const {
234 size_t const l1_end = l1_.size();
235 size_t const l2_end = l2_.size();
236
237 size_t l2_pos = ((rank - 1) / WideRankSelectConfig::SELECT_SAMPLE_RATE);
238 size_t l1_pos = l2_pos / 128;
239
240 if constexpr (optimize_one_or_dont_care(optimized_for)) {
241 while (l1_pos + 1 < l1_end && l1_[l1_pos + 1] < rank) {
242 ++l1_pos;
243 }
244 rank -= l1_[l1_pos];
245 } else {
246 while (l1_pos + 1 < l1_end &&
247 ((l1_pos + 1) * WideRankSelectConfig::L1_BIT_SIZE) -
248 l1_[l1_pos + 1] <
249 rank) {
250 ++l1_pos;
251 }
252 rank -= (l1_pos * WideRankSelectConfig::L1_BIT_SIZE) - l1_[l1_pos];
253 }
254
255 l2_pos = std::max(l1_pos * 128, l2_pos);
256
257 if constexpr (use_linear_search(find_with)) {
258 if constexpr (optimize_one_or_dont_care(optimized_for)) {
259 while (l2_pos + 1 < l2_end && l2_[l2_pos + 1] < rank) {
260 ++l2_pos;
261 }
262 rank -= l2_[l2_pos];
263 } else {
264 size_t added = 0;
265 while (l2_pos + 1 < l2_end &&
266 ((added + 1) * WideRankSelectConfig::L2_BIT_SIZE) -
267 l2_[l2_pos + 1] <
268 rank) {
269 ++l2_pos;
270 ++added;
271 }
272 rank -= (added * WideRankSelectConfig::L2_BIT_SIZE) - l2_[l2_pos];
273 }
274 } else if constexpr (use_binary_search(find_with)) {
275 size_t const end = std::min((l1_pos + 1) * 128, l2_end);
276 size_t const iterations = tlx::integer_log2_ceil(end - l2_pos + 1);
277 size_t size = 1ULL << (iterations - 1);
278 size_t mid = end - size;
279 size >>= 1;
280 // Starting positions of the next two intervals which we are
281 // going to use to prefetch the potential next comparison positions (mid).
282 size_t start_next_left = l2_pos;
283 size_t start_next_right = mid + 1;
284
285 if constexpr (optimize_one_or_dont_care(optimized_for)) {
286 while (size > 0) {
287 // The search space does not fit into one cache line, so we do
288 // some prefetching. We assume that a cache line has size 64
289 // bytes (until \c
290 // std::hardware_constructive_interference_size is finally
291 // available in gcc) and we have an array containing 2 byte
292 // elements (l2).
293 if (size > 16) {
294 __builtin_prefetch(&l2_[start_next_left + size], 0, 0);
295 __builtin_prefetch(&l2_[start_next_right + size], 0, 0);
296 }
297 start_next_left =
298 (rank > l2_[mid]) ? start_next_right : start_next_left;
299 start_next_right = start_next_left + size;
300 mid = start_next_left + size - 1;
301 size >>= 1;
302 }
303 l2_pos = (rank > l2_[mid]) ? mid : start_next_left - 1;
304 rank -= l2_[l2_pos];
305 } else {
306 size_t const offset = (128 * l1_pos);
307 while (size > 0) {
308 // The search space does not fit into one cache line, so we do
309 // some prefetching. We assume that a cache line has size 64
310 // bytes (until \c
311 // std::hardware_constructive_interference_size is finally
312 // available in gcc) and we have an array containing 2 byte
313 // elements (l2).
314 if (size > 16) {
315 __builtin_prefetch(&l2_[start_next_left + size], 0, 0);
316 __builtin_prefetch(&l2_[start_next_right + size], 0, 0);
317 }
318 start_next_left =
319 (rank > ((mid - offset) * WideRankSelectConfig::L2_BIT_SIZE) -
320 l2_[mid]) ?
321 start_next_right :
322 start_next_left;
323 start_next_right = start_next_left + size;
324 mid = start_next_left + size - 1;
325 size >>= 1;
326 }
327 l2_pos = (rank > ((mid - offset) * WideRankSelectConfig::L2_BIT_SIZE) -
328 l2_[mid]) ?
329 mid :
330 start_next_left - 1;
331 rank -= ((l2_pos - offset) * WideRankSelectConfig::L2_BIT_SIZE) -
332 l2_[l2_pos];
333 }
334 }
335
336 size_t last_pos = l2_pos * WideRankSelectConfig::L2_WORD_SIZE;
337 size_t popcount = 0;
338
339 while ((popcount = pasta::popcount<1>(data_ + last_pos)) < rank) {
340 ++last_pos;
341 rank -= popcount;
342 }
343 return (last_pos * 64) + select(data_[last_pos], rank - 1);
344 }
345
346 /*!
347 * \brief Estimate for the space usage.
348 * \return Number of bytes used by this data structure.
349 */
350 [[nodiscard("space usage computed but not used")]] size_t
351 space_usage() const override {
352 return samples0_.size() * sizeof(uint32_t) +
353 samples1_.size() * sizeof(uint32_t) +
354 sizeof(*this); // included in sizeof(*this)
355 }
356
357private:
358 //! Function used initializing data structure to reduce LOCs of constructor.
359 void init() {
360 size_t const l2_end = l2_.size();
361 size_t next_sample0_value = 1;
362 size_t next_sample1_value = 1;
363 for (size_t l2_pos = 0, offset = 0; l2_pos < l2_end; ++l2_pos) {
364 offset = l1_[l2_pos / 128];
365 if constexpr (optimize_one_or_dont_care(optimized_for)) {
366 if ((l2_pos * WideRankSelectConfig::L2_BIT_SIZE) -
367 (offset + l2_[l2_pos] >= next_sample1_value)) {
368 samples0_.push_back(l2_pos - 1);
369 next_sample0_value += WideRankSelectConfig::SELECT_SAMPLE_RATE;
370 }
371 if (offset + l2_[l2_pos] >= next_sample1_value) {
372 samples1_.push_back(l2_pos - 1);
373 next_sample1_value += WideRankSelectConfig::SELECT_SAMPLE_RATE;
374 }
375 } else {
376 if (offset + l2_[l2_pos] >= next_sample1_value) {
377 samples0_.push_back(l2_pos - 1);
378 next_sample0_value += WideRankSelectConfig::SELECT_SAMPLE_RATE;
379 }
380 if ((l2_pos * WideRankSelectConfig::L2_BIT_SIZE) -
381 (offset + l2_[l2_pos] >= next_sample1_value)) {
382 samples1_.push_back(l2_pos - 1);
383 next_sample1_value += WideRankSelectConfig::SELECT_SAMPLE_RATE;
384 }
385 }
386 }
387 }
388}; // class WideRankSelect
389
390//! \}
391
392} // namespace pasta
393
394/******************************************************************************/
Select support for BitVector.
Definition: wide_rank_select.hpp:62
WideRankSelect(WideRankSelect &&other)=default
Default move constructor.
size_t space_usage() const override
Estimate for the space usage.
Definition: wide_rank_select.hpp:351
WideRankSelect(VectorType &bv)
Constructor. Creates the auxiliary information for efficient rank and select queries.
Definition: wide_rank_select.hpp:95
size_t select0(size_t rank) const
Get position of specific zero, i.e., select.
Definition: wide_rank_select.hpp:114
size_t select1(size_t rank) const
Get position of specific one, i.e., select.
Definition: wide_rank_select.hpp:233
WideRankSelect()=default
Default constructor w/o parameter.
WideRankSelect & operator=(WideRankSelect &&other)=default
Default move assignment.
~WideRankSelect()=default
Destructor. Deleting manually created arrays.
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition: wide_rank.hpp:76
OptimizedFor
Enum used to specify which queries the rank (and select) data structures should be optimized for.
Definition: optimized_for.hpp:40
constexpr bool use_linear_search(FindL2FlatWith const find_with)
Helper function indicating whether a linear search function should be used.
Definition: find_l2_flat_with.hpp:48
constexpr bool optimize_one_or_dont_care(OptimizedFor const optimized_for)
Helper function indicating if queries should be optimized for one queries or the caller does not care...
Definition: optimized_for.hpp:59
constexpr bool use_binary_search(FindL2FlatWith const find_with)
Helper function indicating whether a binary search function should be used.
Definition: find_l2_flat_with.hpp:59
FindL2WideWith
Enum used to specify whether intrinsic functions should be used.
Definition: find_l2_wide_with.hpp:35
@ DONT_CARE
It does not matter (both types are called equally often).
static constexpr size_t L2_WORD_SIZE
Number of 64-bit words covered by an L2-block.
Definition: wide_rank.hpp:46
static constexpr size_t L1_BIT_SIZE
Bits covered by an L1-block.
Definition: wide_rank.hpp:43
static constexpr size_t SELECT_SAMPLE_RATE
Sample rate of positions for faster select queries.
Definition: wide_rank.hpp:51
static constexpr size_t L2_BIT_SIZE
Bits covered by an L2-block.
Definition: wide_rank.hpp:41