16 #ifndef GRAPHSCOPE_PTHASH_UTILS_EF_SEQUENCE_VIEW_VIEW_H_
17 #define GRAPHSCOPE_PTHASH_UTILS_EF_SEQUENCE_VIEW_VIEW_H_
23 #include <type_traits>
26 #include "encoders/util.hpp"
32 static_assert(std::is_pod<T>::value,
"T must be POD type");
33 ref_vector() : buffer_(nullptr), size_(0) {}
36 void init(
const T* buffer,
size_t size) {
41 const T* data()
const {
return buffer_; }
42 const T& operator[](
size_t idx)
const {
return buffer_[idx]; }
50 struct bit_vector_view {
51 const uint64_t* data()
const {
return m_bits.data(); }
53 template <
typename Visitor>
54 void visit(Visitor& visitor) {
55 visitor.visit(m_size);
56 visitor.visit_vec(m_bits);
60 ref_vector<uint64_t> m_bits;
66 inline uint64_t select(
const bit_vector_view& bv, uint64_t idx)
const {
67 assert(idx < m_positions);
68 uint64_t block = idx / block_size;
69 int64_t block_pos = m_block_inventory[block];
71 uint64_t overflow_pos = uint64_t(-block_pos - 1);
72 return m_overflow_positions[overflow_pos + (idx & (block_size - 1))];
75 size_t subblock = idx / subblock_size;
76 size_t start_pos = uint64_t(block_pos) + m_subblock_inventory[subblock];
77 size_t reminder = idx & (subblock_size - 1);
82 const uint64_t* data = bv.data();
83 size_t word_idx = start_pos >> 6;
84 size_t word_shift = start_pos & 63;
85 uint64_t word = data[word_idx] & (uint64_t(-1) << word_shift);
88 size_t popcnt = pthash::util::popcount(word);
89 if (reminder < popcnt) {
93 word = data[++word_idx];
96 return (word_idx << 6) + pthash::util::select_in_word(word, reminder);
99 template <
typename Visitor>
100 void visit(Visitor& visitor) {
101 visitor.visit(m_positions);
102 visitor.visit_vec(m_block_inventory);
103 visitor.visit_vec(m_subblock_inventory);
104 visitor.visit_vec(m_overflow_positions);
107 static const size_t block_size = 1024;
108 static const size_t subblock_size = 32;
109 static const size_t max_in_block_distance = 1 << 16;
112 ref_vector<int64_t> m_block_inventory;
113 ref_vector<uint16_t> m_subblock_inventory;
114 ref_vector<uint64_t> m_overflow_positions;
119 struct compact_vector_view {
120 inline uint64_t size()
const {
return m_size; }
121 inline uint64_t width()
const {
return m_width; }
122 inline uint64_t access(uint64_t pos)
const {
123 assert(pos < size());
124 uint64_t i = pos * m_width;
125 const char* ptr =
reinterpret_cast<const char*
>(m_bits.data());
126 return (*(
reinterpret_cast<uint64_t const*
>(ptr + (i >> 3))) >> (i & 7)) &
130 template <
typename Visitor>
131 void visit(Visitor& visitor) {
132 visitor.visit(m_size);
133 visitor.visit(m_width);
134 visitor.visit(m_mask);
135 visitor.visit_vec(m_bits);
141 ref_vector<uint64_t> m_bits;
146 struct ef_sequence_view {
147 uint64_t access(uint64_t i)
const {
148 assert(i < m_low_bits.size());
149 return ((m_high_bits_d1.select(m_high_bits, i) - i) << m_low_bits.width()) |
150 m_low_bits.access(i);
153 template <
typename Visitor>
154 void visit(Visitor& visitor) {
155 visitor.visit(m_high_bits);
156 visitor.visit(m_high_bits_d1);
157 visitor.visit(m_low_bits);
160 bit_vector_view m_high_bits;
161 darray1_view m_high_bits_d1;
162 compact_vector_view m_low_bits;
169 #endif // GRAPHSCOPE_PTHASH_UTILS_EF_SEQUENCE_VIEW_VIEW_H_