bit_vector/flat__rank_8hpp_source.html

/*******************************************************************************

 * This file is part of pasta::bit_vector.

 *

 * Copyright (C) 2021 Florian Kurpicz <florian@kurpicz.org>

 *

 * pasta::bit_vector is free software: you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation, either version 3 of the License, or

 * (at your option) any later version.

 *

 * pasta::bit_vector is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with pasta::bit_vector.  If not, see <http://www.gnu.org/licenses/>.

 *

 ******************************************************************************/


#pragma once


#include "pasta/bit_vector/bit_vector.hpp"

#include "pasta/bit_vector/support/find_l2_flat_with.hpp"

#include "pasta/bit_vector/support/l12_type.hpp"

#include "pasta/bit_vector/support/optimized_for.hpp"

#include "pasta/bit_vector/support/popcount.hpp"


#include <numeric>

#include <pasta/utils/debug_asserts.hpp>

#include <tlx/container/simple_vector.hpp>


namespace pasta {


/*!

 * \ingroup pasta_bit_vector_configuration

 * \brief Static configuration for \c FlatRank and

 * \c FlatRankSelect

 */


struct FlatRankSelectConfig {

  //! Bits covered by an L2-block.

  static constexpr size_t L2_BIT_SIZE = 512;

  //! Bits covered by an L1-block.

  static constexpr size_t L1_BIT_SIZE = 8 * L2_BIT_SIZE;

  //! Bits covered by an L0-block.

  static constexpr size_t L0_BIT_SIZE =

      static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) + 1;


  //! Number of 64-bit words covered by an L2-block.

  static constexpr size_t L2_WORD_SIZE = L2_BIT_SIZE / (sizeof(uint64_t) * 8);

  //! Number of 64-bit words covered by an L1-block.

  static constexpr size_t L1_WORD_SIZE = L1_BIT_SIZE / (sizeof(uint64_t) * 8);

  //! Number of 64-bit words covered by an L0-block.

  static constexpr size_t L0_WORD_SIZE = L0_BIT_SIZE / (sizeof(uint64_t) * 8);


  //! Sample rate of positions for faster select queries.

  static constexpr size_t SELECT_SAMPLE_RATE = 8192;

}; // struct FlatRankSelectConfig


//! \addtogroup pasta_bit_vector_rank

//! \{


/*!

 * \brief %Rank support for \ref BitVector that can be used as an alternative

 * to \ref Rank for bit vectors up to length 2^40.

 *

 * The rank support is an extended and engineered version of the popcount rank

 * support by Zhou et al. \cite ZhouAK2013PopcountRankSelect. This flat rank

 * support hoverer removes the highest utility array (L0) and also groups

 * twice as many L2-blocks in a single L1-block. This allows us to store more

 * information---most importantly the number of ones w.r.t. the beginning of

 * the L1-block---in the L2-blocks. For the L1/2-block layout see

 * \ref BigL12Type.

 *

 * \tparam OptimizedFor Compile time option to optimize data structure for

 * either 0, 1, or no specific type of query.

 * \tparam VectorType Type of the vector the rank data structure is constructed

 * for, e.g., plain \c BitVector or a compressed bit vector.

 */

template <OptimizedFor optimized_for = OptimizedFor::DONT_CARE,

          typename VectorType = BitVector>


class FlatRank {

  //! Friend class, using internal information l12_.

  template <OptimizedFor o, FindL2FlatWith f, typename v>

  friend class FlatRankSelect;


protected:

  //! Size of the bit vector the rank support is constructed for.

  size_t data_size_;

  //! Pointer to the data of the uncompressed bit vector. Can be invalid in

  //! after compressable bit vector is compressed.

  uint64_t const* data_;

  //! Pointer to the data of the bit vector. This is mutable to enable caching

  //! in the compressed bit vector variant.

  typename VectorType::RawDataConstAccess mutable data_access_;

  //! Array containing the information about the L1- and L2-blocks.

  tlx::SimpleVector<BigL12Type, tlx::SimpleVectorMode::NoInitNoDestroy> l12_;

  //! Number of actual existing BigL12-blocks (important for scanning)

  size_t l12_end_ = 0;


public:

  //! Default constructor w/o parameter.

  FlatRank() = default;


  /*!

   * \brief Constructor. Creates the auxiliary information for efficient rank

   * queries.

   * \param bv Vector of \c VectorType the rank structure is created for.

   */


  FlatRank(VectorType& bv)

      : data_size_(bv.data().size()),

        data_(bv.data().data()),

        data_access_(),

        l12_((data_size_ / FlatRankSelectConfig::L1_WORD_SIZE) + 1) {

    init();

    if constexpr (!std::is_same_v<VectorType, BitVector>) {

      bv.compress();

      data_access_ = bv.compresed_data();

    } else {

      data_access_ = data_;

    }

  }


  /*!

   * \brief Computes rank of zeros.

   * \param index Index the rank of zeros is computed for.

   * \return Number of zeros (rank) before position \c index.

   */

  [[nodiscard("rank0 computed but not used")]] size_t


  rank0(size_t index) const {

    return index - rank1(index);

  }


  /*!

   * \brief Computes rank of ones.

   * \param index Index the rank of ones is computed for.

   * \return Number of ones (rank) before position \c index.

   */

  [[nodiscard("rank1 computed but not used")]] size_t


  rank1(size_t index) const {

    size_t offset = ((index / 512) * 8);

    size_t const l1_pos = index / FlatRankSelectConfig::L1_BIT_SIZE;

    size_t const l2_pos = ((index % FlatRankSelectConfig::L1_BIT_SIZE) /

                           FlatRankSelectConfig::L2_BIT_SIZE);

    size_t result = l12_[l1_pos].l1() + l12_[l1_pos][l2_pos];


    // It is faster to not have a specialized rank0 function when

    // optimized for zero queries, because there is no popcount for

    // zero equivalent and for all popcounts in this code, the words

    // would have to be bit-wise negated, which is more expensive than

    // the computation below.

    if constexpr (!optimize_one_or_dont_care(optimized_for)) {

      result = ((l1_pos * FlatRankSelectConfig::L1_BIT_SIZE) +

                (l2_pos * FlatRankSelectConfig::L2_BIT_SIZE)) -

               result;

    }


    index %= FlatRankSelectConfig::L2_BIT_SIZE;

    PASTA_ASSERT(index < 512,

                 "Trying to access bits that should be "

                 "covered in an L1-block");

    for (size_t i = 0; i < index / 64; ++i) {

      result += std::popcount(data_access_[offset++]);

    }

    if (index %= 64; index > 0) [[likely]] {

      uint64_t const remaining = (data_access_[offset]) << (64 - index);

      result += std::popcount(remaining);

    }

    return result;

  }


  /*!

   * \brief Estimate for the space usage.

   * \return Number of bytes used by this data structure.

   */

  [[nodiscard("space useage computed but not used")]] virtual size_t


  space_usage() const {

    return l12_.size() * sizeof(BigL12Type) + sizeof(*this);

  }


private:

  //! Function used for initializing data structure to reduce LOCs of

  //! constructor.

  void init() {

    uint64_t const* data = data_;

    uint64_t const* const data_end = data_ + data_size_;


    uint64_t l1_entry = 0ULL;

    std::array<uint16_t, 7> l2_entries = {0, 0, 0, 0, 0, 0, 0};


    while (data + 64 < data_end) {

      if constexpr (optimize_one_or_dont_care(optimized_for)) {

        l2_entries[0] = popcount<8>(data);

      } else {

        l2_entries[0] = popcount_zeros<8>(data);

      }

      data += 8;

      for (size_t i = 1; i < 7; ++i) {

        if constexpr (optimize_one_or_dont_care(optimized_for)) {

          l2_entries[i] = l2_entries[i - 1] + popcount<8>(data);

        } else {

          l2_entries[i] = l2_entries[i - 1] + popcount_zeros<8>(data);

        }

        data += 8;

      }

      l12_[l12_end_++] = BigL12Type(l1_entry, l2_entries);

      if constexpr (optimize_one_or_dont_care(optimized_for)) {

        l1_entry += l2_entries.back() + popcount<8>(data);

      } else {

        l1_entry += l2_entries.back() + popcount_zeros<8>(data);

      }

      data += 8;

    }

    size_t l2_pos = 0;

    l2_entries = {0, 0, 0, 0, 0, 0, 0};

    while (data + 8 < data_end) {

      if constexpr (optimize_one_or_dont_care(optimized_for)) {

        l2_entries[l2_pos++] = popcount<8>(data);

      } else {

        l2_entries[l2_pos++] = popcount_zeros<8>(data);

      }

      data += 8;

    }

    if (l2_pos < 7) {

      while (data < data_end) {

        if constexpr (optimize_one_or_dont_care(optimized_for)) {

          l2_entries[l2_pos] += popcount<1>(data++);

        } else {

          l2_entries[l2_pos] += popcount_zeros<1>(data++);

        }

      }

    }

    std::partial_sum(l2_entries.begin(), l2_entries.end(), l2_entries.begin());

    l12_[l12_end_++] = BigL12Type(l1_entry, l2_entries);

  }

}; // class FlatRank


//! \}


} // namespace pasta


/******************************************************************************/

pasta::BitVector
Uncompressed, highly tuned, fixed size bit vector.
Definition bit_vector.hpp:178

pasta::FlatRankSelect
Select support for BitVector that can be used as an alternative to RankSelect for bit vectors up to l...
Definition flat_rank_select.hpp:73

pasta::FlatRank
Rank support for BitVector that can be used as an alternative to Rank for bit vectors up to length 2^...
Definition flat_rank.hpp:82

pasta::FlatRank::space_usage
virtual size_t space_usage() const
Estimate for the space usage.
Definition flat_rank.hpp:177

pasta::FlatRank::FlatRank
FlatRank()=default
Default constructor w/o parameter.

pasta::FlatRank::l12_
tlx::SimpleVector< BigL12Type, tlx::SimpleVectorMode::NoInitNoDestroy > l12_
Array containing the information about the L1- and L2-blocks.
Definition flat_rank.hpp:97

pasta::FlatRank::rank1
size_t rank1(size_t index) const
Computes rank of ones.
Definition flat_rank.hpp:140

pasta::FlatRank::data_access_
VectorType::RawDataConstAccess data_access_
Definition flat_rank.hpp:95

pasta::FlatRank::rank0
size_t rank0(size_t index) const
Computes rank of zeros.
Definition flat_rank.hpp:130

pasta::FlatRank::FlatRank
FlatRank(VectorType &bv)
Constructor. Creates the auxiliary information for efficient rank queries.
Definition flat_rank.hpp:110

pasta::FlatRank::l12_end_
size_t l12_end_
Number of actual existing BigL12-blocks (important for scanning)
Definition flat_rank.hpp:99

pasta::FlatRank::data_
uint64_t const  * data_
Definition flat_rank.hpp:92

pasta::FlatRank::data_size_
size_t data_size_
Size of the bit vector the rank support is constructed for.
Definition flat_rank.hpp:89

pasta::OptimizedFor
OptimizedFor
Enum used to specify which queries the rank (and select) data structures should be optimized for.
Definition optimized_for.hpp:40

pasta::optimize_one_or_dont_care
constexpr bool optimize_one_or_dont_care(OptimizedFor const optimized_for)
Helper function indicating if queries should be optimized for one queries or the caller does not care...
Definition optimized_for.hpp:59

pasta::OptimizedFor::DONT_CARE
@ DONT_CARE
It does not matter (both types are called equally often).

optimized_for.hpp

popcount.hpp

pasta::BigL12Type
Check that L12Type requires only 64 bits.
Definition l12_type.hpp:94

pasta::FlatRankSelectConfig
Static configuration for FlatRank and FlatRankSelect.
Definition flat_rank.hpp:40

pasta::FlatRankSelectConfig::L2_WORD_SIZE
static constexpr size_t L2_WORD_SIZE
Number of 64-bit words covered by an L2-block.
Definition flat_rank.hpp:50

pasta::FlatRankSelectConfig::L1_BIT_SIZE
static constexpr size_t L1_BIT_SIZE
Bits covered by an L1-block.
Definition flat_rank.hpp:44

pasta::FlatRankSelectConfig::L0_WORD_SIZE
static constexpr size_t L0_WORD_SIZE
Number of 64-bit words covered by an L0-block.
Definition flat_rank.hpp:54

pasta::FlatRankSelectConfig::L2_BIT_SIZE
static constexpr size_t L2_BIT_SIZE
Bits covered by an L2-block.
Definition flat_rank.hpp:42

pasta::FlatRankSelectConfig::SELECT_SAMPLE_RATE
static constexpr size_t SELECT_SAMPLE_RATE
Sample rate of positions for faster select queries.
Definition flat_rank.hpp:57

pasta::FlatRankSelectConfig::L0_BIT_SIZE
static constexpr size_t L0_BIT_SIZE
Bits covered by an L0-block.
Definition flat_rank.hpp:46

pasta::FlatRankSelectConfig::L1_WORD_SIZE
static constexpr size_t L1_WORD_SIZE
Number of 64-bit words covered by an L1-block.
Definition flat_rank.hpp:52