pasta::bit_vector  v1.0.0
Compact and Fast Rank and Select Data Structure for Bit Vectors
wide_rank.hpp
1/*******************************************************************************
2 * This file is part of pasta::bit_vector.
3 *
4 * Copyright (C) 2021 Florian Kurpicz <florian@kurpicz.org>
5 *
6 * pasta::bit_vector is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * pasta::bit_vector is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with pasta::bit_vector. If not, see <http://www.gnu.org/licenses/>.
18 *
19 ******************************************************************************/
20
21#pragma once
22
23#include "pasta/bit_vector/bit_vector.hpp"
24#include "pasta/bit_vector/support/find_l2_wide_with.hpp"
25#include "pasta/bit_vector/support/l12_type.hpp"
26#include "pasta/bit_vector/support/optimized_for.hpp"
27#include "pasta/bit_vector/support/popcount.hpp"
28#include "pasta/utils/container/aligned_vector.hpp"
29
30#include <tlx/container/simple_vector.hpp>
31
32namespace pasta {
33
34/*!
35 * \ingroup pasta_bit_vector_configuration
36 * \brief Static configuration for \c WideRank and
37 * \c WideRankSelect
38 */
40 //! Bits covered by an L2-block.
41 static constexpr size_t L2_BIT_SIZE = 512;
42 //! Bits covered by an L1-block.
43 static constexpr size_t L1_BIT_SIZE = 128 * L2_BIT_SIZE;
44
45 //! Number of 64-bit words covered by an L2-block.
46 static constexpr size_t L2_WORD_SIZE = L2_BIT_SIZE / (sizeof(uint64_t) * 8);
47 //! Number of 64-bit words covered by an L1-block.
48 static constexpr size_t L1_WORD_SIZE = L1_BIT_SIZE / (sizeof(uint64_t) * 8);
49
50 //! Sample rate of positions for faster select queries.
51 static constexpr size_t SELECT_SAMPLE_RATE = 8192;
52}; // struct WideRankSelectConfig
53
54//! \addtogroup pasta_bit_vector_rank
55//! \{
56
57/*!
58 * \brief %Rank support for \ref BitVector that can be used as an alternative
59 * to \ref Rank for bit vectors up to length 2^40.
60 *
61 * The rank support is an extended and engineered version of the popcount rank
62 * support by Zhou et al. \cite ZhouAK2013PopcountRankSelect. This flat rank
63 * support hoverer removes the highest utility array (L0) and also groups
64 * twice as many L2-blocks in a single L1-block. This allows us to store more
65 * information---most importantly the number of ones w.r.t. the beginning of
66 * the L1-block---in the L2-blocks. For the L1/2-block layout see
67 * \ref BigL12Type.
68 *
69 * \tparam OptimizedFor Compile time option to optimize data
70 * structure for either 0, 1, or no specific type of query.
71 * \tparam VectorType Type of the vector the rank data structure is constructed
72 * for, e.g., plain \c BitVector or a compressed bit vector.
73 */
74template <OptimizedFor optimized_for = OptimizedFor::DONT_CARE,
75 typename VectorType = BitVector>
76class WideRank {
77 //! Friend class, using internal information l12_.
78 template <OptimizedFor o, FindL2WideWith f, typename v>
79 friend class WideRankSelect;
80
81 //! Size of the bit vector the rank support is constructed for.
82 size_t data_size_;
83 //! Pointer to the data of the bit vector.
84 VectorType::RawDataConstAccess data_;
85
86 //! Array containing the information about the L1-blocks.
87 tlx::SimpleVector<uint64_t, tlx::SimpleVectorMode::NoInitNoDestroy> l1_;
88 //! Array containing the information about the L2-blocks.
89 tlx::SimpleVector<uint16_t, tlx::SimpleVectorMode::NoInitNoDestroy> l2_;
90
91public:
92 //! Default constructor w/o parameter.
93 WideRank() = default;
94
95 /*!
96 * \brief Constructor. Creates the auxiliary information for efficient rank
97 * queries.
98 * \param bv Vector of type \c VectorType the rank structure is created for.
99 */
100 WideRank(VectorType& bv)
101 : data_size_(bv.size_),
102 data_(bv.data_.data()),
103 l1_((data_size_ / WideRankSelectConfig::L1_WORD_SIZE) + 1),
104 l2_((data_size_ / WideRankSelectConfig::L2_WORD_SIZE) + 1) {
105 init();
106 }
107
108 /*!
109 * \brief Computes rank of zeros.
110 * \param index Index the rank of zeros is computed for.
111 * \return Number of zeros (rank) before position \c index.
112 */
113 [[nodiscard("rank0 computed but not used")]] size_t
114 rank0(size_t index) const {
115 return index - rank1(index);
116 }
117
118 /*!
119 * \brief Computes rank of ones.
120 * \param index Index the rank of ones is computed for.
121 * \return Number of ones (rank) before position \c index.
122 */
123 [[nodiscard("rank1 computed but not used")]] size_t
124 rank1(size_t index) const {
125 size_t const l1_pos = index / WideRankSelectConfig::L1_BIT_SIZE;
126 size_t const l2_pos = index / WideRankSelectConfig::L2_BIT_SIZE;
127 size_t result = l1_[l1_pos] + l2_[l2_pos];
128 if constexpr (!optimize_one_or_dont_care(optimized_for)) {
129 result = (l2_pos * WideRankSelectConfig::L2_BIT_SIZE) - result;
130 }
131
132 size_t offset = l2_pos * WideRankSelectConfig::L2_WORD_SIZE;
133 size_t const full_words = (index % WideRankSelectConfig::L2_BIT_SIZE) / 64;
134
135 for (size_t i = 0; i < full_words; ++i) {
136 result += std::popcount(data_[offset++]);
137 }
138
139 if (index %= 64; index > 0) [[likely]] {
140 uint64_t const remaining = data_[offset] << (64 - index);
141 result += std::popcount(remaining);
142 }
143 return result;
144 }
145
146 /*!
147 * \brief Estimate for the space usage.
148 * \return Number of bytes used by this data structure.
149 */
150 [[nodiscard("space useage computed but not used")]] virtual size_t
151 space_usage() const {
152 return (l1_.size() * sizeof(uint64_t)) + (l2_.size() * sizeof(uint16_t)) +
153 sizeof(*this);
154 }
155
156private:
157 //! Function used for initializing data structure to reduce LOCs of
158 //! constructor.
159 void init() {
160 uint64_t const* data = data_;
161 uint64_t const* const data_end = data_ + data_size_;
162 l1_[0] = 0;
163 l2_[0] = 0;
164
165 size_t l1_pos = 0;
166 size_t l2_pos = 0;
167 size_t l2_entry = 0;
168 while (data + 8 < data_end) {
169 l2_[l2_pos++] = l2_entry;
170 if constexpr (optimize_one_or_dont_care(optimized_for)) {
171 l2_entry += popcount<8>(data);
172 } else {
173 l2_entry += popcount_zeros<8>(data);
174 }
175 data += 8;
176 if (l2_pos % 128 == 0 && l2_pos > 0) [[unlikely]] {
177 ++l1_pos;
178 l1_[l1_pos] = l1_[l1_pos - 1] + l2_entry;
179 l2_entry = 0;
180 }
181 }
182 l2_[l2_pos++] = l2_entry;
183 }
184}; // class BitVectorFlatRank
185
186//! \}
187
188} // namespace pasta
189
190/******************************************************************************/
Uncompressed, highly tuned, fixed size bit vector.
Definition: bit_vector.hpp:179
Select support for BitVector.
Definition: wide_rank_select.hpp:62
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition: wide_rank.hpp:76
size_t rank1(size_t index) const
Computes rank of ones.
Definition: wide_rank.hpp:124
WideRank(VectorType &bv)
Constructor. Creates the auxiliary information for efficient rank queries.
Definition: wide_rank.hpp:100
virtual size_t space_usage() const
Estimate for the space usage.
Definition: wide_rank.hpp:151
WideRank()=default
Default constructor w/o parameter.
size_t rank0(size_t index) const
Computes rank of zeros.
Definition: wide_rank.hpp:114
OptimizedFor
Enum used to specify which queries the rank (and select) data structures should be optimized for.
Definition: optimized_for.hpp:40
constexpr bool optimize_one_or_dont_care(OptimizedFor const optimized_for)
Helper function indicating if queries should be optimized for one queries or the caller does not care...
Definition: optimized_for.hpp:59
@ DONT_CARE
It does not matter (both types are called equally often).
Static configuration for WideRank and WideRankSelect.
Definition: wide_rank.hpp:39
static constexpr size_t L1_WORD_SIZE
Number of 64-bit words covered by an L1-block.
Definition: wide_rank.hpp:48
static constexpr size_t L2_WORD_SIZE
Number of 64-bit words covered by an L2-block.
Definition: wide_rank.hpp:46
static constexpr size_t L1_BIT_SIZE
Bits covered by an L1-block.
Definition: wide_rank.hpp:43
static constexpr size_t SELECT_SAMPLE_RATE
Sample rate of positions for faster select queries.
Definition: wide_rank.hpp:51
static constexpr size_t L2_BIT_SIZE
Bits covered by an L2-block.
Definition: wide_rank.hpp:41