Flex  0.17.9
ef_sequence_view.h
Go to the documentation of this file.
1 
16 #ifndef GRAPHSCOPE_PTHASH_UTILS_EF_SEQUENCE_VIEW_VIEW_H_
17 #define GRAPHSCOPE_PTHASH_UTILS_EF_SEQUENCE_VIEW_VIEW_H_
18 
19 #include <assert.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 
23 #include <type_traits>
24 
25 #ifdef USE_PTHASH
26 #include "encoders/util.hpp"
27 
28 namespace gs {
29 
30 template <typename T>
31 struct ref_vector {
32  static_assert(std::is_pod<T>::value, "T must be POD type");
33  ref_vector() : buffer_(nullptr), size_(0) {}
34  ~ref_vector() {}
35 
36  void init(const T* buffer, size_t size) {
37  buffer_ = buffer;
38  size_ = size;
39  }
40 
41  const T* data() const { return buffer_; }
42  const T& operator[](size_t idx) const { return buffer_[idx]; }
43 
44  const T* buffer_;
45  size_t size_;
46 };
47 
48 // This code is an adaptation from
49 // https://github.com/jermp/pthash/blob/master/include/encoders/bit_vector.hpp
50 struct bit_vector_view {
51  const uint64_t* data() const { return m_bits.data(); }
52 
53  template <typename Visitor>
54  void visit(Visitor& visitor) {
55  visitor.visit(m_size);
56  visitor.visit_vec(m_bits);
57  }
58 
59  size_t m_size;
60  ref_vector<uint64_t> m_bits;
61 };
62 
63 // This code is an adaptation from
64 // https://github.com/jermp/pthash/blob/master/include/encoders/darray.hpp
65 struct darray1_view {
66  inline uint64_t select(const bit_vector_view& bv, uint64_t idx) const {
67  assert(idx < m_positions);
68  uint64_t block = idx / block_size;
69  int64_t block_pos = m_block_inventory[block];
70  if (block_pos < 0) { // sparse super-block
71  uint64_t overflow_pos = uint64_t(-block_pos - 1);
72  return m_overflow_positions[overflow_pos + (idx & (block_size - 1))];
73  }
74 
75  size_t subblock = idx / subblock_size;
76  size_t start_pos = uint64_t(block_pos) + m_subblock_inventory[subblock];
77  size_t reminder = idx & (subblock_size - 1);
78  if (!reminder) {
79  return start_pos;
80  }
81 
82  const uint64_t* data = bv.data();
83  size_t word_idx = start_pos >> 6;
84  size_t word_shift = start_pos & 63;
85  uint64_t word = data[word_idx] & (uint64_t(-1) << word_shift);
86 
87  while (true) {
88  size_t popcnt = pthash::util::popcount(word);
89  if (reminder < popcnt) {
90  break;
91  }
92  reminder -= popcnt;
93  word = data[++word_idx];
94  }
95 
96  return (word_idx << 6) + pthash::util::select_in_word(word, reminder);
97  }
98 
99  template <typename Visitor>
100  void visit(Visitor& visitor) {
101  visitor.visit(m_positions);
102  visitor.visit_vec(m_block_inventory);
103  visitor.visit_vec(m_subblock_inventory);
104  visitor.visit_vec(m_overflow_positions);
105  }
106 
107  static const size_t block_size = 1024; // 2048
108  static const size_t subblock_size = 32;
109  static const size_t max_in_block_distance = 1 << 16;
110 
111  size_t m_positions;
112  ref_vector<int64_t> m_block_inventory;
113  ref_vector<uint16_t> m_subblock_inventory;
114  ref_vector<uint64_t> m_overflow_positions;
115 };
116 
117 // This code is an adaptation from
118 // https://github.com/jermp/pthash/blob/master/include/encoders/compact_vector.hpp
119 struct compact_vector_view {
120  inline uint64_t size() const { return m_size; }
121  inline uint64_t width() const { return m_width; }
122  inline uint64_t access(uint64_t pos) const {
123  assert(pos < size());
124  uint64_t i = pos * m_width;
125  const char* ptr = reinterpret_cast<const char*>(m_bits.data());
126  return (*(reinterpret_cast<uint64_t const*>(ptr + (i >> 3))) >> (i & 7)) &
127  m_mask;
128  }
129 
130  template <typename Visitor>
131  void visit(Visitor& visitor) {
132  visitor.visit(m_size);
133  visitor.visit(m_width);
134  visitor.visit(m_mask);
135  visitor.visit_vec(m_bits);
136  }
137 
138  uint64_t m_size;
139  uint64_t m_width;
140  uint64_t m_mask;
141  ref_vector<uint64_t> m_bits;
142 };
143 
144 // This code is an adaptation from
145 // https://github.com/jermp/pthash/blob/master/include/encoders/ef_sequence.hpp
146 struct ef_sequence_view {
147  uint64_t access(uint64_t i) const {
148  assert(i < m_low_bits.size());
149  return ((m_high_bits_d1.select(m_high_bits, i) - i) << m_low_bits.width()) |
150  m_low_bits.access(i);
151  }
152 
153  template <typename Visitor>
154  void visit(Visitor& visitor) {
155  visitor.visit(m_high_bits);
156  visitor.visit(m_high_bits_d1);
157  visitor.visit(m_low_bits);
158  }
159 
160  bit_vector_view m_high_bits;
161  darray1_view m_high_bits_d1;
162  compact_vector_view m_low_bits;
163 };
164 
165 } // namespace gs
166 
167 #endif // USE_PTHASH
168 
169 #endif // GRAPHSCOPE_PTHASH_UTILS_EF_SEQUENCE_VIEW_VIEW_H_
gs
Definition: adj_list.h:23