Flex  0.17.9
id_indexer.h
Go to the documentation of this file.
1 
16 #ifndef GRAPHSCOPE_GRAPH_ID_INDEXER_H_
17 #define GRAPHSCOPE_GRAPH_ID_INDEXER_H_
18 
19 #include <atomic>
20 #include <cassert>
21 #include <cmath>
22 #include <memory>
23 #include <string_view>
24 #include <thread>
25 #include <type_traits>
26 #include <vector>
27 
28 #include "flat_hash_map/flat_hash_map.hpp"
29 #include "flex/utils/mmap_array.h"
33 #include "glog/logging.h"
34 #include "grape/io/local_io_adaptor.h"
35 #include "grape/serialization/in_archive.h"
36 #include "grape/serialization/out_archive.h"
37 
38 namespace gs {
39 
40 namespace id_indexer_impl {
41 
42 static constexpr int8_t min_lookups = 4;
43 static constexpr double max_load_factor = 0.5f;
44 
45 inline int8_t log2(size_t value) {
46  static constexpr int8_t table[64] = {
47  63, 0, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, 33, 42, 3,
48  61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4,
49  62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21,
50  56, 45, 25, 31, 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5};
51  value |= value >> 1;
52  value |= value >> 2;
53  value |= value >> 4;
54  value |= value >> 8;
55  value |= value >> 16;
56  value |= value >> 32;
57  return table[((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58];
58 }
59 
60 template <typename T>
61 struct KeyBuffer {
62  using type = std::vector<T>;
63 
64  template <typename IOADAPTOR_T>
65  static void serialize(std::unique_ptr<IOADAPTOR_T>& writer,
66  const type& buffer) {
67  size_t size = buffer.size();
68  CHECK(writer->Write(&size, sizeof(size_t)));
69  if (size > 0) {
70  CHECK(writer->Write(buffer.data(), size * sizeof(T)));
71  }
72  }
73 
74  template <typename IOADAPTOR_T>
75  static void deserialize(std::unique_ptr<IOADAPTOR_T>& reader, type& buffer) {
76  size_t size;
77  CHECK(reader->Read(&size, sizeof(size_t)));
78  if (size > 0) {
79  buffer.resize(size);
80  CHECK(reader->Read(buffer.data(), size * sizeof(T)));
81  }
82  }
83 };
84 
85 template <>
86 struct KeyBuffer<std::string> {
87  using type = std::vector<std::string>;
88 
89  template <typename IOADAPTOR_T>
90  static void serialize(std::unique_ptr<IOADAPTOR_T>& writer,
91  const type& buffer) {
92  grape::InArchive arc;
93  arc << buffer;
94  CHECK(writer->WriteArchive(arc));
95  }
96 
97  template <typename IOADAPTOR_T>
98  static void deserialize(std::unique_ptr<IOADAPTOR_T>& reader, type& buffer) {
99  grape::OutArchive arc;
100  CHECK(reader->ReadArchive(arc));
101  arc >> buffer;
102  }
103 };
104 
105 template <>
106 struct KeyBuffer<std::string_view> {
108 
109  template <typename IOADAPTOR_T>
110  static void serialize(std::unique_ptr<IOADAPTOR_T>& writer,
111  const type& buffer) {
112  size_t content_buffer_size = buffer.content_buffer().size();
113  CHECK(writer->Write(&content_buffer_size, sizeof(size_t)));
114  if (content_buffer_size > 0) {
115  CHECK(writer->Write(buffer.content_buffer().data(),
116  content_buffer_size * sizeof(char)));
117  }
118  size_t offset_buffer_size = buffer.offset_buffer().size();
119  CHECK(writer->Write(&offset_buffer_size, sizeof(size_t)));
120  if (offset_buffer_size > 0) {
121  CHECK(writer->Write(buffer.offset_buffer().data(),
122  offset_buffer_size * sizeof(size_t)));
123  }
124  }
125 
126  template <typename IOADAPTOR_T>
127  static void deserialize(std::unique_ptr<IOADAPTOR_T>& reader, type& buffer) {
128  size_t content_buffer_size;
129  CHECK(reader->Read(&content_buffer_size, sizeof(size_t)));
130  if (content_buffer_size > 0) {
131  buffer.content_buffer().resize(content_buffer_size);
132  CHECK(reader->Read(buffer.content_buffer().data(),
133  content_buffer_size * sizeof(char)));
134  }
135  size_t offset_buffer_size;
136  CHECK(reader->Read(&offset_buffer_size, sizeof(size_t)));
137  if (offset_buffer_size > 0) {
138  buffer.offset_buffer().resize(offset_buffer_size);
139  CHECK(reader->Read(buffer.offset_buffer().data(),
140  offset_buffer_size * sizeof(size_t)));
141  }
142  }
143 };
144 
145 } // namespace id_indexer_impl
146 
147 template <typename T>
148 struct GHash {
149  size_t operator()(const T& val) const { return std::hash<T>()(val); }
150 };
151 
152 template <>
153 struct GHash<int64_t> {
154  size_t operator()(const int64_t& val) const {
155  uint64_t x = static_cast<uint64_t>(val);
156  x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
157  x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
158  x = x ^ (x >> 31);
159  return x;
160  }
161 };
162 
163 template <>
164 struct GHash<Any> {
165  size_t operator()(const Any& val) const {
166  if (val.type == PropertyType::kInt64) {
167  return GHash<int64_t>()(val.AsInt64());
168  } else if (val.type == PropertyType::kInt32) {
169  return GHash<int32_t>()(val.AsInt32());
170  } else if (val.type == PropertyType::kUInt64) {
171  return GHash<uint64_t>()(val.AsUInt64());
172  } else if (val.type == PropertyType::kUInt32) {
173  return GHash<uint32_t>()(val.AsUInt32());
174  } else {
175  return GHash<std::string_view>()(val.AsStringView());
176  }
177  }
178 };
179 
180 template <typename KEY_T, typename INDEX_T>
181 class IdIndexer;
182 
183 template <typename INDEX_T>
184 class LFIndexer;
185 
186 template <typename KEY_T, typename INDEX_T>
188  const std::string& filename, LFIndexer<INDEX_T>& lf,
189  const std::string& snapshot_dir,
190  const std::string& work_dir, PropertyType type);
191 
192 template <typename INDEX_T>
193 class LFIndexer {
194  public:
196  : indices_(),
197  indices_size_(0),
198  num_elements_(0),
200  keys_(nullptr),
201  hasher_() {}
203  : indices_(std::move(rhs.indices_)),
205  num_elements_(rhs.num_elements_.load()),
207  hasher_(rhs.hasher_) {
208  if (keys_ != rhs.keys_) {
209  if (keys_ != nullptr) {
210  delete keys_;
211  }
212  keys_ = rhs.keys_;
213  }
214  hash_policy_.set_mod_function_by_index(
215  rhs.hash_policy_.get_mod_function_index());
216  }
217 
219  if (keys_ != nullptr) {
220  delete keys_;
221  }
222  }
223 
224  static std::string prefix() { return "indexer"; }
225 
226  void init(const PropertyType& type) {
227  if (keys_ != nullptr) {
228  delete keys_;
229  }
230  keys_ = nullptr;
231  if (type == PropertyType::kInt64) {
233  } else if (type == PropertyType::kInt32) {
235  } else if (type == PropertyType::kUInt64) {
237  } else if (type == PropertyType::kUInt32) {
239  } else if (type.type_enum == impl::PropertyTypeImpl::kVarChar) {
242  } else if (type.type_enum == impl::PropertyTypeImpl::kStringView) {
243  LOG(WARNING) << "String type is a deprecated type, use varchar instead.";
244  LOG(WARNING) << "Use default max length"
246  << " for varchar type.";
249  } else {
250  LOG(FATAL) << "Not support type [" << type << "] as pk type ..";
251  }
252  }
253 
254  void build_empty_LFIndexer(const std::string& filename,
255  const std::string& snapshot_dir,
256  const std::string& work_dir) {
257  keys_->open(filename + ".keys", "", work_dir);
258  indices_.open(work_dir + "/" + filename + ".indices", true);
259 
260  num_elements_.store(0);
261  indices_size_ = 0;
262  dump_meta(work_dir + "/" + filename + ".meta");
263  indices_.reset();
264  keys_->close();
265  }
266 
267  void reserve(size_t size) { rehash(std::max(size, num_elements_.load())); }
268 
269  void rehash(size_t size) {
270  size = std::max(size, 4ul);
271  keys_->resize(size);
272  size =
273  static_cast<size_t>(std::ceil(size / id_indexer_impl::max_load_factor));
274  if (size == indices_size_) {
275  return;
276  }
277 
278  auto new_prime_index = hash_policy_.next_size_over(size);
279  hash_policy_.commit(new_prime_index);
280  size_t num_elements = num_elements_.load();
283  for (size_t k = 0; k != size; ++k) {
284  indices_[k] = std::numeric_limits<INDEX_T>::max();
285  }
287  for (INDEX_T idx = 0; idx < num_elements; ++idx) {
288  const auto& oid = keys_->get(idx);
289  size_t index =
290  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
291  static constexpr INDEX_T sentinel = std::numeric_limits<INDEX_T>::max();
292  while (true) {
293  if (indices_[index] == sentinel) {
294  indices_[index] = idx;
295  break;
296  }
297  index = (index + 1) % (num_slots_minus_one_ + 1);
298  }
299  }
300  }
301 
302  size_t capacity() const { return keys_->size(); }
303 
304  size_t size() const { return num_elements_.load(); }
305  PropertyType get_type() const { return keys_->type(); }
306 
307  INDEX_T insert(const Any& oid) {
308  assert(oid.type == get_type());
309  INDEX_T ind = static_cast<INDEX_T>(num_elements_.fetch_add(1));
310  keys_->set_any(ind, oid);
311  size_t index =
312  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
313  static constexpr INDEX_T sentinel = std::numeric_limits<INDEX_T>::max();
314  while (true) {
315  if (__sync_bool_compare_and_swap(&indices_.data()[index], sentinel,
316  ind)) {
317  break;
318  }
319  index = (index + 1) % (num_slots_minus_one_ + 1);
320  }
321  return ind;
322  }
323 
324  INDEX_T get_index(const Any& oid) const {
325  assert(oid.type == get_type());
326  size_t index =
327  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
328  static constexpr INDEX_T sentinel = std::numeric_limits<INDEX_T>::max();
329  while (true) {
330  INDEX_T ind = indices_.get(index);
331  if (ind == sentinel) {
332  VLOG(10) << "cannot find " << oid.to_string() << " in lf_indexer";
333  return ind;
334  } else if (keys_->get(ind) == oid) {
335  return ind;
336  } else {
337  index = (index + 1) % (num_slots_minus_one_ + 1);
338  }
339  }
340  }
341 
342  bool get_index(const Any& oid, INDEX_T& ret) const {
343  if (oid.type != get_type()) {
344  return false;
345  }
346  size_t index =
347  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
348  static constexpr INDEX_T sentinel = std::numeric_limits<INDEX_T>::max();
349  while (true) {
350  INDEX_T ind = indices_.get(index);
351  if (ind == sentinel) {
352  return false;
353  } else if (keys_->get(ind) == oid) {
354  ret = ind;
355  return true;
356  } else {
357  index = (index + 1) % (num_slots_minus_one_ + 1);
358  }
359  }
360  return false;
361  }
362 
363  Any get_key(const INDEX_T& index) const { return keys_->get(index); }
364 
365  void copy_to_tmp(const std::string& cur_path, const std::string& tmp_path) {
366  copy_file(cur_path + ".meta", tmp_path + ".meta");
367  load_meta(tmp_path + ".meta");
368  keys_->copy_to_tmp(cur_path + ".keys", tmp_path + ".keys");
369  copy_file(cur_path + ".indices", tmp_path + ".indices");
370  }
371 
372  void open(const std::string& name, const std::string& snapshot_dir,
373  const std::string& work_dir) {
374  if (std::filesystem::exists(snapshot_dir + "/" + name + ".meta")) {
375  copy_to_tmp(snapshot_dir + "/" + name, work_dir + "/" + name);
376  } else {
377  build_empty_LFIndexer(name, "", work_dir);
378  }
379 
380  load_meta(work_dir + "/" + name + ".meta");
381  keys_->open(name + ".keys", "", work_dir);
382  indices_.open(work_dir + "/" + name + ".indices", true);
383  size_t num_elements = num_elements_.load();
384 
385  keys_->resize(num_elements + (num_elements >> 2));
386 
388  }
389 
390  void open_in_memory(const std::string& name) {
391  if (std::filesystem::exists(name + ".meta")) {
392  load_meta(name + ".meta");
393  } else {
394  num_elements_.store(0);
395  }
396  keys_->open_in_memory(name + ".keys");
397  indices_.open(name + ".indices", false);
399  size_t num_elements = num_elements_.load();
400  keys_->resize(num_elements + (num_elements >> 2));
401  }
402 
403  void open_with_hugepages(const std::string& name, bool hugepage_table) {
404  if (std::filesystem::exists(name + ".meta")) {
405  load_meta(name + ".meta");
406  } else {
407  num_elements_.store(0);
408  }
409  keys_->open_with_hugepages(name + ".keys", true);
410  if (hugepage_table) {
411  indices_.open_with_hugepages(name + ".indices");
412  } else {
413  indices_.open(name + ".indices", false);
414  }
416  size_t num_elements = num_elements_.load();
417  keys_->resize(num_elements + (num_elements >> 2));
418  }
419 
420  void dump(const std::string& name, const std::string& snapshot_dir) {
421  keys_->resize(num_elements_.load());
422  keys_->dump(snapshot_dir + "/" + name + ".keys");
423  indices_.dump(snapshot_dir + "/" + name + ".indices");
424  dump_meta(snapshot_dir + "/" + name + ".meta");
425  close();
426  }
427 
428  void close() {
429  keys_->close();
430  indices_.reset();
431  }
432 
433  void dump_meta(const std::string& filename) const {
434  grape::InArchive arc;
435  arc << get_type() << num_elements_.load() << num_slots_minus_one_
436  << hash_policy_.get_mod_function_index();
437  FILE* fout = fopen(filename.c_str(), "wb");
438  fwrite(arc.GetBuffer(), sizeof(char), arc.GetSize(), fout);
439  fflush(fout);
440  fclose(fout);
441  }
442 
443  void load_meta(const std::string& filename) {
444  grape::OutArchive arc;
445  FILE* fin = fopen(filename.c_str(), "r");
446  size_t meta_file_size = std::filesystem::file_size(filename);
447  std::vector<char> buf(meta_file_size);
448  CHECK_EQ(fread(buf.data(), sizeof(char), meta_file_size, fin),
449  meta_file_size);
450  arc.SetSlice(buf.data(), meta_file_size);
451  size_t mod_function_index;
452  PropertyType type;
453  arc >> type;
454  size_t num_elements;
455  arc >> num_elements;
456 
457  num_elements_.store(num_elements);
458  arc >> num_slots_minus_one_ >> mod_function_index;
459  init(type);
460  hash_policy_.set_mod_function_by_index(mod_function_index);
461  fclose(fin);
462  }
463 
464  // get keys
465  const ColumnBase& get_keys() const { return *keys_; }
466  void warmup(int thread_num) const {
467  size_t keys_size = num_elements_.load();
468  size_t indices_size = indices_.size();
469  std::atomic<size_t> k_i(0), i_i(0);
470  std::atomic<size_t> output(0);
471  size_t chunk = 4096;
472  std::vector<std::thread> threads;
473  for (int i = 0; i < thread_num; ++i) {
474  threads.emplace_back([&]() {
475  size_t ret = 0;
476  while (true) {
477  size_t begin = std::min(k_i.fetch_add(chunk), keys_size);
478  size_t end = std::min(begin + chunk, keys_size);
479  if (begin == end) {
480  break;
481  }
482  while (begin < end) {
483  keys_->get(begin);
484  ++begin;
485  }
486  }
487  while (true) {
488  size_t begin = std::min(i_i.fetch_add(chunk), indices_size);
489  size_t end = std::min(begin + chunk, indices_size);
490  if (begin >= end) {
491  break;
492  }
493  while (begin < end) {
494  ret += indices_.get(begin);
495  ++begin;
496  }
497  }
498  output.fetch_add(ret);
499  });
500  }
501  for (auto& thrd : threads) {
502  thrd.join();
503  }
504  (void) output.load();
505  }
506 
507  private:
509  indices_; // size() == indices_size_ == num_slots_minus_one_ +
510  // log(num_slots_minus_one_)
512  std::atomic<size_t> num_elements_;
515 
516  ska::ska::prime_number_hash_policy hash_policy_;
518 
519  template <typename _KEY_T, typename _INDEX_T>
520  friend void build_lf_indexer(const IdIndexer<_KEY_T, _INDEX_T>& input,
521  const std::string& filename,
522  LFIndexer<_INDEX_T>& output,
523  const std::string& snapshot_dir,
524  const std::string& work_dir, PropertyType type);
525 };
526 
527 template <typename INDEX_T>
529  public:
530  IdIndexerBase() = default;
531  virtual ~IdIndexerBase() = default;
532  virtual PropertyType get_type() const = 0;
533  virtual void _add(const Any& oid) = 0;
534  virtual bool add(const Any& oid, INDEX_T& lid) = 0;
535  virtual bool get_key(const INDEX_T& lid, Any& oid) const = 0;
536  virtual bool get_index(const Any& oid, INDEX_T& lid) const = 0;
537  virtual size_t size() const = 0;
538 };
539 
540 template <typename KEY_T, typename INDEX_T>
541 class IdIndexer : public IdIndexerBase<INDEX_T> {
542  public:
544  using ind_buffer_t = std::vector<INDEX_T>;
545  using dist_buffer_t = std::vector<int8_t>;
546 
549 
550  PropertyType get_type() const override { return AnyConverter<KEY_T>::type(); }
551 
552  void _add(const Any& oid) override {
553  assert(get_type() == oid.type);
554  KEY_T oid_;
555  ConvertAny<KEY_T>::to(oid, oid_);
556  _add(oid_);
557  }
558 
559  bool add(const Any& oid, INDEX_T& lid) override {
560  assert(get_type() == oid.type);
561  KEY_T oid_;
562  ConvertAny<KEY_T>::to(oid, oid_);
563  return add(oid_, lid);
564  }
565 
566  bool get_key(const INDEX_T& lid, Any& oid) const override {
567  KEY_T oid_;
568  bool flag = get_key(lid, oid_);
569  if (flag) {
570  oid = Any::From(oid_);
571  }
572  return flag;
573  }
574 
575  bool get_index(const Any& oid, INDEX_T& lid) const override {
576  assert(get_type() == oid.type);
577  KEY_T oid_;
578  ConvertAny<KEY_T>::to(oid, oid_);
579  return get_index(oid_, lid);
580  }
581  void Clear() {
582  keys_.clear();
583  indices_.clear();
584  distances_.clear();
585  num_elements_ = 0;
587  hash_policy_.reset();
588  }
589 
590  size_t entry_num() const { return distances_.size(); }
591 
592  bool add(const KEY_T& oid, INDEX_T& lid) {
593  size_t index =
594  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
595 
596  int8_t distance_from_desired = 0;
597  for (; distances_[index] >= distance_from_desired;
598  ++index, ++distance_from_desired) {
599  INDEX_T cur_lid = indices_[index];
600  if (keys_[cur_lid] == oid) {
601  lid = cur_lid;
602  return false;
603  }
604  }
605 
606  lid = static_cast<INDEX_T>(keys_.size());
607  keys_.push_back(oid);
608  assert(keys_.size() == num_elements_ + 1);
609  emplace_new_value(distance_from_desired, index, lid);
610  assert(keys_.size() == num_elements_);
611  return true;
612  }
613 
614  bool add(KEY_T&& oid, INDEX_T& lid) {
615  size_t index =
616  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
617 
618  int8_t distance_from_desired = 0;
619  for (; distances_[index] >= distance_from_desired;
620  ++index, ++distance_from_desired) {
621  INDEX_T cur_lid = indices_[index];
622  if (keys_[cur_lid] == oid) {
623  lid = cur_lid;
624  return false;
625  }
626  }
627 
628  lid = static_cast<INDEX_T>(keys_.size());
629  keys_.push_back(std::move(oid));
630  assert(keys_.size() == num_elements_ + 1);
631  emplace_new_value(distance_from_desired, index, lid);
632  assert(keys_.size() == num_elements_);
633  return true;
634  }
635 
636  bool _add(const KEY_T& oid, size_t hash_value, INDEX_T& lid) {
637  size_t index =
639 
640  int8_t distance_from_desired = 0;
641  for (; distances_[index] >= distance_from_desired;
642  ++index, ++distance_from_desired) {
643  INDEX_T cur_lid = indices_[index];
644  if (keys_[cur_lid] == oid) {
645  lid = cur_lid;
646  return false;
647  }
648  }
649 
650  lid = static_cast<INDEX_T>(keys_.size());
651  keys_.push_back(oid);
652  assert(keys_.size() == num_elements_ + 1);
653  emplace_new_value(distance_from_desired, index, lid);
654  assert(keys_.size() == num_elements_);
655  return true;
656  }
657 
658  bool _add(KEY_T&& oid, size_t hash_value, INDEX_T& lid) {
659  size_t index =
661 
662  int8_t distance_from_desired = 0;
663  for (; distances_[index] >= distance_from_desired;
664  ++index, ++distance_from_desired) {
665  INDEX_T cur_lid = indices_[index];
666  if (keys_[cur_lid] == oid) {
667  lid = cur_lid;
668  return false;
669  }
670  }
671 
672  lid = static_cast<INDEX_T>(keys_.size());
673  keys_.push_back(std::move(oid));
674  assert(keys_.size() == num_elements_ + 1);
675  emplace_new_value(distance_from_desired, index, lid);
676  assert(keys_.size() == num_elements_);
677  return true;
678  }
679 
680  void _add(const KEY_T& oid) {
681  size_t index =
682  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
683 
684  int8_t distance_from_desired = 0;
685  for (; distances_[index] >= distance_from_desired;
686  ++index, ++distance_from_desired) {
687  if (keys_[indices_[index]] == oid) {
688  return;
689  }
690  }
691 
692  INDEX_T lid = static_cast<INDEX_T>(keys_.size());
693  keys_.push_back(oid);
694  assert(keys_.size() == num_elements_ + 1);
695  emplace_new_value(distance_from_desired, index, lid);
696  assert(keys_.size() == num_elements_);
697  }
698 
699  void _add(KEY_T&& oid) {
700  size_t index =
701  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
702 
703  int8_t distance_from_desired = 0;
704  for (; distances_[index] >= distance_from_desired;
705  ++index, ++distance_from_desired) {
706  if (keys_[indices_[index]] == oid) {
707  return;
708  }
709  }
710 
711  INDEX_T lid = static_cast<INDEX_T>(keys_.size());
712  keys_.push_back(std::move(oid));
713  assert(keys_.size() == num_elements_ + 1);
714  emplace_new_value(distance_from_desired, index, lid);
715  assert(keys_.size() == num_elements_);
716  }
717 
718  size_t bucket_count() const {
720  }
721 
722  bool empty() const { return (num_elements_ == 0); }
723 
724  size_t size() const override { return num_elements_; }
725 
726  bool get_key(INDEX_T lid, KEY_T& oid) const {
727  if (static_cast<size_t>(lid) >= num_elements_) {
728  return false;
729  }
730  oid = keys_[lid];
731  return true;
732  }
733 
734  bool get_index(const KEY_T& oid, INDEX_T& lid) const {
735  size_t index =
736  hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
737  for (int8_t distance = 0; distances_[index] >= distance;
738  ++distance, ++index) {
739  INDEX_T ret = indices_[index];
740  if (keys_[ret] == oid) {
741  lid = ret;
742  return true;
743  }
744  }
745  return false;
746  }
747 
748  bool _get_index(const KEY_T& oid, size_t hash, INDEX_T& lid) const {
749  size_t index = hash_policy_.index_for_hash(hash, num_slots_minus_one_);
750  for (int8_t distance = 0; distances_[index] >= distance;
751  ++distance, ++index) {
752  INDEX_T ret = indices_[index];
753  if (keys_[ret] == oid) {
754  lid = ret;
755  return true;
756  }
757  }
758  return false;
759  }
760 
762  keys_.swap(rhs.keys_);
763  indices_.swap(rhs.indices_);
764  distances_.swap(rhs.distances_);
765 
766  hash_policy_.swap(rhs.hash_policy_);
767  std::swap(max_lookups_, rhs.max_lookups_);
768  std::swap(num_elements_, rhs.num_elements_);
770 
771  std::swap(hasher_, rhs.hasher_);
772  }
773 
774  const key_buffer_t& keys() const { return keys_; }
775 
776  key_buffer_t& keys() { return keys_; }
777 
778  void Serialize(std::unique_ptr<grape::LocalIOAdaptor>& writer) const {
780  grape::InArchive arc;
781  arc << hash_policy_.get_mod_function_index() << max_lookups_
783  << distances_.size();
784  CHECK(writer->WriteArchive(arc));
785  arc.Clear();
786 
787  if (indices_.size() > 0) {
788  CHECK(writer->Write(const_cast<uint8_t*>(indices_.data()),
789  indices_.size() * sizeof(INDEX_T)));
790  }
791  if (distances_.size() > 0) {
792  CHECK(writer->Write(const_cast<int8_t*>(distances_.data()),
793  distances_.size() * sizeof(int8_t)));
794  }
795  }
796 
797  void Deserialize(std::unique_ptr<grape::LocalIOAdaptor>& reader) {
799  grape::OutArchive arc;
800  CHECK(reader->ReadArchive(arc));
801  size_t mod_function_index;
802  size_t indices_size, distances_size;
803  arc >> mod_function_index >> max_lookups_ >> num_elements_ >>
804  num_slots_minus_one_ >> indices_size >> distances_size;
805  arc.Clear();
806 
807  hash_policy_.set_mod_function_by_index(mod_function_index);
808  indices_.resize(indices_size);
809  distances_.resize(distances_size);
810  if (indices_size > 0) {
811  CHECK(reader->Read(indices_.data(), indices_.size() * sizeof(INDEX_T)));
812  }
813  if (distances_size > 0) {
814  CHECK(
815  reader->Read(distances_.data(), distances_.size() * sizeof(int8_t)));
816  }
817  }
818 
819  void _rehash(size_t num) { rehash(num); }
820 
821  private:
822  void emplace(INDEX_T lid) {
823  KEY_T key = keys_[lid];
824  size_t index =
825  hash_policy_.index_for_hash(hasher_(key), num_slots_minus_one_);
826  int8_t distance_from_desired = 0;
827  for (; distances_[index] >= distance_from_desired;
828  ++index, ++distance_from_desired) {
829  if (indices_[index] == lid) {
830  return;
831  }
832  }
833 
834  emplace_new_value(distance_from_desired, index, lid);
835  }
836 
837  void emplace_new_value(int8_t distance_from_desired, size_t index,
838  INDEX_T lid) {
839  if (num_slots_minus_one_ == 0 || distance_from_desired == max_lookups_ ||
840  num_elements_ + 1 >
842  grow();
843  return;
844  } else if (distances_[index] < 0) {
845  indices_[index] = lid;
846  distances_[index] = distance_from_desired;
847  ++num_elements_;
848  return;
849  }
850  INDEX_T to_insert = lid;
851  std::swap(distance_from_desired, distances_[index]);
852  std::swap(to_insert, indices_[index]);
853  for (++distance_from_desired, ++index;; ++index) {
854  if (distances_[index] < 0) {
855  indices_[index] = to_insert;
856  distances_[index] = distance_from_desired;
857  ++num_elements_;
858  return;
859  } else if (distances_[index] < distance_from_desired) {
860  std::swap(distance_from_desired, distances_[index]);
861  std::swap(to_insert, indices_[index]);
862  ++distance_from_desired;
863  } else {
864  ++distance_from_desired;
865  if (distance_from_desired == max_lookups_) {
866  grow();
867  return;
868  }
869  }
870  }
871  }
872 
873  void grow() { rehash(std::max(size_t(4), 2 * bucket_count())); }
874 
875  void rehash(size_t num_buckets) {
876  num_buckets = std::max(
877  num_buckets, static_cast<size_t>(std::ceil(
879 
880  if (num_buckets == 0) {
882  return;
883  }
884 
885  auto new_prime_index = hash_policy_.next_size_over(num_buckets);
886  if (num_buckets == bucket_count()) {
887  return;
888  }
889 
890  int8_t new_max_lookups = compute_max_lookups(num_buckets);
891 
892  dist_buffer_t new_distances(num_buckets + new_max_lookups);
893  ind_buffer_t new_indices(num_buckets + new_max_lookups,
894  std::numeric_limits<INDEX_T>::max());
895 
896  size_t special_end_index = num_buckets + new_max_lookups - 1;
897  for (size_t i = 0; i != special_end_index; ++i) {
898  new_distances[i] = -1;
899  }
900  new_distances[special_end_index] = 0;
901 
902  new_indices.swap(indices_);
903  new_distances.swap(distances_);
904 
905  std::swap(num_slots_minus_one_, num_buckets);
907  hash_policy_.commit(new_prime_index);
908 
909  max_lookups_ = new_max_lookups;
910 
911  num_elements_ = 0;
912  INDEX_T elem_num = static_cast<INDEX_T>(keys_.size());
913  for (INDEX_T lid = 0; lid < elem_num; ++lid) {
914  emplace(lid);
915  }
916  }
917 
919  keys_.clear();
920 
921  indices_.clear();
922  distances_.clear();
924  std::numeric_limits<INDEX_T>::max());
927 
929  hash_policy_.reset();
931  num_elements_ = 0;
932  }
933 
934  static int8_t compute_max_lookups(size_t num_buckets) {
935  int8_t desired = id_indexer_impl::log2(num_buckets);
936  return std::max(id_indexer_impl::min_lookups, desired);
937  }
938 
942 
943  ska::ska::prime_number_hash_policy hash_policy_;
945  size_t num_elements_ = 0;
947 
949 
950  template <typename _KEY_T, typename _INDEX_T>
951  friend void build_lf_indexer(const IdIndexer<_KEY_T, _INDEX_T>& input,
952  const std::string& filename,
953  LFIndexer<_INDEX_T>& output,
954  const std::string& snapshot_dir,
955  const std::string& work_dir, PropertyType type);
956 };
957 
958 template <typename KEY_T, typename INDEX_T>
959 struct _move_data {
961  void operator()(const key_buffer_t& input, ColumnBase& col, size_t size) {
962  auto& keys = dynamic_cast<TypedColumn<KEY_T>&>(col);
963  for (size_t idx = 0; idx < size; ++idx) {
964  keys.set_value(idx, input[idx]);
965  }
966  }
967 };
968 
969 template <typename INDEX_T>
970 struct _move_data<std::string_view, INDEX_T> {
971  using key_buffer_t =
973  void operator()(const key_buffer_t& input, ColumnBase& col, size_t size) {
974  auto& keys = dynamic_cast<StringColumn&>(col);
975  for (size_t idx = 0; idx < size; ++idx) {
976  keys.set_value(idx, input[idx]);
977  }
978  }
979 };
980 
981 template <typename KEY_T, typename INDEX_T>
983  const std::string& filename, LFIndexer<INDEX_T>& lf,
984  const std::string& snapshot_dir,
985  const std::string& work_dir, PropertyType type) {
986  size_t size = input.keys_.size();
987  lf.init(type);
988  lf.keys_->open(filename + ".keys", "", work_dir);
989  lf.keys_->resize(size);
990  _move_data<KEY_T, INDEX_T>()(input.keys_, *lf.keys_, size);
991  lf.num_elements_.store(size);
992 
993  lf.indices_.open(snapshot_dir + "/" + filename + ".indices", true);
994  lf.indices_.resize(input.num_slots_minus_one_ + 1);
995 
996  lf.indices_size_ = input.indices_.size();
997 
998  lf.hash_policy_.set_mod_function_by_index(
999  input.hash_policy_.get_mod_function_index());
1001  memcpy(lf.indices_.data(), input.indices_.data(),
1002  lf.indices_.size() * sizeof(INDEX_T));
1003  static constexpr INDEX_T sentinel = std::numeric_limits<INDEX_T>::max();
1004 
1005  std::vector<INDEX_T> residuals;
1006  for (size_t idx = lf.indices_.size(); idx < lf.indices_size_; ++idx) {
1007  if (input.indices_[idx] != sentinel) {
1008  residuals.push_back(input.indices_[idx]);
1009  }
1010  }
1011  for (const auto& lid : residuals) {
1012  auto oid = input.keys_[lid];
1013  size_t index = input.hash_policy_.index_for_hash(
1014  input.hasher_(oid), input.num_slots_minus_one_);
1015  while (true) {
1016  if (lf.indices_[index] == lid) {
1017  break;
1018  } else if (lf.indices_[index] == sentinel) {
1019  lf.indices_[index] = lid;
1020  break;
1021  }
1022  index = (index + 1) % (input.num_slots_minus_one_ + 1);
1023  }
1024  }
1025  lf.dump_meta(snapshot_dir + "/" + filename + ".meta");
1026 
1027  lf.keys_->dump(snapshot_dir + "/" + filename + ".keys");
1028  std::filesystem::remove(work_dir + "/" + filename + ".meta");
1029  lf.keys_->close();
1030  lf.keys_->open(filename + ".keys", snapshot_dir, "");
1031 }
1032 
1033 } // namespace gs
1034 
1035 #endif // GRAPHSCOPE_GRAPH_ID_INDEXER_H_
gs::IdIndexer::distances_
dist_buffer_t distances_
Definition: id_indexer.h:941
gs::IdIndexer::indices_
ind_buffer_t indices_
Definition: id_indexer.h:940
gs::_move_data< std::string_view, INDEX_T >::key_buffer_t
typename id_indexer_impl::KeyBuffer< std::string_view >::type key_buffer_t
Definition: id_indexer.h:972
gs::IdIndexer::rehash
void rehash(size_t num_buckets)
Definition: id_indexer.h:875
gs::LFIndexer::open_with_hugepages
void open_with_hugepages(const std::string &name, bool hugepage_table)
Definition: id_indexer.h:403
gs::GHash::operator()
size_t operator()(const T &val) const
Definition: id_indexer.h:149
gs::IdIndexer::num_elements_
size_t num_elements_
Definition: id_indexer.h:945
gs::IdIndexer::_add
void _add(KEY_T &&oid)
Definition: id_indexer.h:699
gs::Any::AsUInt64
uint64_t AsUInt64() const
Definition: types.h:611
gs::build_lf_indexer
void build_lf_indexer(const IdIndexer< KEY_T, INDEX_T > &input, const std::string &filename, LFIndexer< INDEX_T > &lf, const std::string &snapshot_dir, const std::string &work_dir, PropertyType type)
Definition: id_indexer.h:982
gs::Any
Definition: types.h:383
gs::IdIndexer::emplace
void emplace(INDEX_T lid)
Definition: id_indexer.h:822
gs::IdIndexer::bucket_count
size_t bucket_count() const
Definition: id_indexer.h:718
gs::ColumnBase::open
virtual void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)=0
gs::LFIndexer::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)
Definition: id_indexer.h:372
gs::LFIndexer::num_elements_
std::atomic< size_t > num_elements_
Definition: id_indexer.h:512
gs::id_indexer_impl::KeyBuffer< std::string >::deserialize
static void deserialize(std::unique_ptr< IOADAPTOR_T > &reader, type &buffer)
Definition: id_indexer.h:98
gs::GHash
Definition: id_indexer.h:148
gs::id_indexer_impl::KeyBuffer< KEY_T >::type
std::vector< KEY_T > type
Definition: id_indexer.h:62
gs::id_indexer_impl::KeyBuffer< std::string >::type
std::vector< std::string > type
Definition: id_indexer.h:87
gs::IdIndexer::compute_max_lookups
static int8_t compute_max_lookups(size_t num_buckets)
Definition: id_indexer.h:934
gs::IdIndexer::build_lf_indexer
friend void build_lf_indexer(const IdIndexer< _KEY_T, _INDEX_T > &input, const std::string &filename, LFIndexer< _INDEX_T > &output, const std::string &snapshot_dir, const std::string &work_dir, PropertyType type)
gs::IdIndexer::swap
void swap(IdIndexer< KEY_T, INDEX_T > &rhs)
Definition: id_indexer.h:761
gs::TypedColumn::set_value
void set_value(size_t index, const T &val)
Definition: column.h:186
gs::ColumnBase::type
virtual PropertyType type() const =0
types.h
gs::IdIndexer::_add
bool _add(KEY_T &&oid, size_t hash_value, INDEX_T &lid)
Definition: id_indexer.h:658
gs::IdIndexer::~IdIndexer
~IdIndexer()
Definition: id_indexer.h:548
gs::IdIndexer< std::string, int >::key_buffer_t
typename id_indexer_impl::KeyBuffer< std::string >::type key_buffer_t
Definition: id_indexer.h:543
gs::Any::From
static Any From(const T &value)
Definition: types.h:681
gs::IdIndexer::hash_policy_
ska::ska::prime_number_hash_policy hash_policy_
Definition: id_indexer.h:943
column.h
gs::GHash< int64_t >::operator()
size_t operator()(const int64_t &val) const
Definition: id_indexer.h:154
gs::LFIndexer::get_index
INDEX_T get_index(const Any &oid) const
Definition: id_indexer.h:324
gs::IdIndexer::entry_num
size_t entry_num() const
Definition: id_indexer.h:590
gs::mmap_array::get
const T & get(size_t idx) const
Definition: mmap_array.h:410
gs::LFIndexer::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)
Definition: id_indexer.h:365
gs::LFIndexer::LFIndexer
LFIndexer(LFIndexer &&rhs)
Definition: id_indexer.h:202
gs::LFIndexer::prefix
static std::string prefix()
Definition: id_indexer.h:224
gs::LFIndexer::get_keys
const ColumnBase & get_keys() const
Definition: id_indexer.h:465
gs::IdIndexer::_rehash
void _rehash(size_t num)
Definition: id_indexer.h:819
gs::mmap_array::resize
void resize(size_t size)
Definition: mmap_array.h:319
gs::IdIndexerBase::IdIndexerBase
IdIndexerBase()=default
gs::IdIndexer::Serialize
void Serialize(std::unique_ptr< grape::LocalIOAdaptor > &writer) const
Definition: id_indexer.h:778
gs::IdIndexerBase
Definition: id_indexer.h:528
gs::LFIndexer::dump_meta
void dump_meta(const std::string &filename) const
Definition: id_indexer.h:433
gs::IdIndexerBase::size
virtual size_t size() const =0
gs::GHash< int64_t >
Definition: id_indexer.h:153
gs::IdIndexerBase::get_type
virtual PropertyType get_type() const =0
gs::IdIndexer::_add
void _add(const Any &oid) override
Definition: id_indexer.h:552
gs::IdIndexer::hasher_
GHash< KEY_T > hasher_
Definition: id_indexer.h:948
gs::ConvertAny::to
static void to(const Any &value, T &out)
Definition: types.h:799
gs::id_indexer_impl::min_lookups
static constexpr int8_t min_lookups
Definition: id_indexer.h:42
gs
Definition: adj_list.h:23
gs::TypedColumn
Definition: column.h:64
gs::IdIndexer::keys
key_buffer_t & keys()
Definition: id_indexer.h:776
gs::StringViewVector
Definition: string_view_vector.h:27
gs::LFIndexer::insert
INDEX_T insert(const Any &oid)
Definition: id_indexer.h:307
gs::IdIndexer::_get_index
bool _get_index(const KEY_T &oid, size_t hash, INDEX_T &lid) const
Definition: id_indexer.h:748
gs::IdIndexer::Deserialize
void Deserialize(std::unique_ptr< grape::LocalIOAdaptor > &reader)
Definition: id_indexer.h:797
gs::PropertyType::kUInt64
static const PropertyType kUInt64
Definition: types.h:141
gs::IdIndexer::keys_
key_buffer_t keys_
Definition: id_indexer.h:939
gs::IdIndexer::grow
void grow()
Definition: id_indexer.h:873
gs::Any::AsInt32
int32_t AsInt32() const
Definition: types.h:616
gs::Any::AsInt64
int64_t AsInt64() const
Definition: types.h:606
gs::PropertyType::type_enum
impl::PropertyTypeImpl type_enum
Definition: types.h:97
gs::IdIndexer::get_index
bool get_index(const KEY_T &oid, INDEX_T &lid) const
Definition: id_indexer.h:734
gs::IdIndexer::Clear
void Clear()
Definition: id_indexer.h:581
gs::LFIndexer::hash_policy_
ska::ska::prime_number_hash_policy hash_policy_
Definition: id_indexer.h:516
gs::id_indexer_impl::KeyBuffer
Definition: id_indexer.h:61
gs::id_indexer_impl::KeyBuffer::serialize
static void serialize(std::unique_ptr< IOADAPTOR_T > &writer, const type &buffer)
Definition: id_indexer.h:65
gs::IdIndexer::size
size_t size() const override
Definition: id_indexer.h:724
gs::Any::AsUInt32
uint32_t AsUInt32() const
Definition: types.h:621
gs::mmap_array::open
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
gs::IdIndexerBase::~IdIndexerBase
virtual ~IdIndexerBase()=default
gs::id_indexer_impl::KeyBuffer< std::string_view >::deserialize
static void deserialize(std::unique_ptr< IOADAPTOR_T > &reader, type &buffer)
Definition: id_indexer.h:127
gs::PropertyType::STRING_DEFAULT_MAX_LENGTH
static constexpr const uint16_t STRING_DEFAULT_MAX_LENGTH
Definition: types.h:96
gs::id_indexer_impl::KeyBuffer::deserialize
static void deserialize(std::unique_ptr< IOADAPTOR_T > &reader, type &buffer)
Definition: id_indexer.h:75
gs::TypedColumn< std::string_view >::set_value
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:501
gs::IdIndexer::reset_to_empty_state
void reset_to_empty_state()
Definition: id_indexer.h:918
gs::LFIndexer::init
void init(const PropertyType &type)
Definition: id_indexer.h:226
gs::mmap_array::data
T * data()
Definition: mmap_array.h:405
gs::LFIndexer::build_empty_LFIndexer
void build_empty_LFIndexer(const std::string &filename, const std::string &snapshot_dir, const std::string &work_dir)
Definition: id_indexer.h:254
gs::LFIndexer::hasher_
GHash< Any > hasher_
Definition: id_indexer.h:517
gs::TypedColumn< std::string_view >
Definition: column.h:338
gs::GHash< Any >::operator()
size_t operator()(const Any &val) const
Definition: id_indexer.h:165
gs::mmap_array::size
size_t size() const
Definition: mmap_array.h:415
gs::PropertyType::kUInt32
static const PropertyType kUInt32
Definition: types.h:138
gs::_move_data< std::string_view, INDEX_T >::operator()
void operator()(const key_buffer_t &input, ColumnBase &col, size_t size)
Definition: id_indexer.h:973
gs::GHash< Any >
Definition: id_indexer.h:164
gs::LFIndexer::close
void close()
Definition: id_indexer.h:428
gs::IdIndexer::add
bool add(const KEY_T &oid, INDEX_T &lid)
Definition: id_indexer.h:592
gs::ColumnBase::copy_to_tmp
virtual void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)=0
gs::IdIndexerBase::get_index
virtual bool get_index(const Any &oid, INDEX_T &lid) const =0
gs::LFIndexer::capacity
size_t capacity() const
Definition: id_indexer.h:302
gs::IdIndexer::_add
bool _add(const KEY_T &oid, size_t hash_value, INDEX_T &lid)
Definition: id_indexer.h:636
gs::ColumnBase::set_any
virtual void set_any(size_t index, const Any &value)=0
gs::LFIndexer::get_index
bool get_index(const Any &oid, INDEX_T &ret) const
Definition: id_indexer.h:342
gs::StringViewVector::content_buffer
std::vector< char > & content_buffer()
Definition: string_view_vector.h:57
gs::LFIndexer::load_meta
void load_meta(const std::string &filename)
Definition: id_indexer.h:443
gs::_move_data
Definition: id_indexer.h:959
gs::IdIndexerBase::_add
virtual void _add(const Any &oid)=0
gs::IdIndexer
Definition: id_indexer.h:181
gs::IdIndexer< std::string, int >::dist_buffer_t
std::vector< int8_t > dist_buffer_t
Definition: id_indexer.h:545
gs::IdIndexer::get_type
PropertyType get_type() const override
Definition: id_indexer.h:550
gs::ColumnBase::dump
virtual void dump(const std::string &filename)=0
gs::IdIndexerBase::get_key
virtual bool get_key(const INDEX_T &lid, Any &oid) const =0
gs::IdIndexerBase::add
virtual bool add(const Any &oid, INDEX_T &lid)=0
gs::impl::PropertyTypeImpl::kStringView
@ kStringView
mmap_array.h
gs::IdIndexer::emplace_new_value
void emplace_new_value(int8_t distance_from_desired, size_t index, INDEX_T lid)
Definition: id_indexer.h:837
gs::ColumnBase::get
virtual Any get(size_t index) const =0
gs::mmap_array::dump
void dump(const std::string &filename)
Definition: mmap_array.h:259
gs::impl::PropertyTypeImpl::kVarChar
@ kVarChar
gs::IdIndexer::empty
bool empty() const
Definition: id_indexer.h:722
gs::IdIndexer::max_lookups_
int8_t max_lookups_
Definition: id_indexer.h:944
gs::ColumnBase::size
virtual size_t size() const =0
gs::_move_data::operator()
void operator()(const key_buffer_t &input, ColumnBase &col, size_t size)
Definition: id_indexer.h:961
gs::ColumnBase::open_in_memory
virtual void open_in_memory(const std::string &name)=0
gs::PropertyType::kInt64
static const PropertyType kInt64
Definition: types.h:140
gs::copy_file
void copy_file(const std::string &src, const std::string &dst)
Definition: file_names.h:80
gs::id_indexer_impl::KeyBuffer< std::string_view >::serialize
static void serialize(std::unique_ptr< IOADAPTOR_T > &writer, const type &buffer)
Definition: id_indexer.h:110
gs::LFIndexer::get_type
PropertyType get_type() const
Definition: id_indexer.h:305
gs::StringViewVector::offset_buffer
std::vector< size_t > & offset_buffer()
Definition: string_view_vector.h:61
gs::IdIndexer::add
bool add(const Any &oid, INDEX_T &lid) override
Definition: id_indexer.h:559
gs::LFIndexer::keys_
ColumnBase * keys_
Definition: id_indexer.h:514
gs::ColumnBase::close
virtual void close()=0
gs::mmap_array< INDEX_T >
gs::id_indexer_impl::KeyBuffer< std::string >::serialize
static void serialize(std::unique_ptr< IOADAPTOR_T > &writer, const type &buffer)
Definition: id_indexer.h:90
gs::IdIndexer::get_key
bool get_key(const INDEX_T &lid, Any &oid) const override
Definition: id_indexer.h:566
gs::snapshot_dir
std::string snapshot_dir(const std::string &work_dir, uint32_t version)
Definition: file_names.h:192
std
Definition: loading_config.h:232
gs::LFIndexer::dump
void dump(const std::string &name, const std::string &snapshot_dir)
Definition: id_indexer.h:420
gs::IdIndexer::num_slots_minus_one_
size_t num_slots_minus_one_
Definition: id_indexer.h:946
gs::ColumnBase::open_with_hugepages
virtual void open_with_hugepages(const std::string &name, bool force)=0
gs::IdIndexer::_add
void _add(const KEY_T &oid)
Definition: id_indexer.h:680
string_view_vector.h
boost::hash_value
std::size_t hash_value(const grape::EmptyType &value)
Definition: types.h:1318
gs::IdIndexer::get_key
bool get_key(INDEX_T lid, KEY_T &oid) const
Definition: id_indexer.h:726
gs::IdIndexer::add
bool add(KEY_T &&oid, INDEX_T &lid)
Definition: id_indexer.h:614
gs::IdIndexer::IdIndexer
IdIndexer()
Definition: id_indexer.h:547
gs::AnyConverter
Definition: types.h:381
gs::Any::type
PropertyType type
Definition: types.h:793
gs::IdIndexer::get_index
bool get_index(const Any &oid, INDEX_T &lid) const override
Definition: id_indexer.h:575
gs::mmap_array::open_with_hugepages
void open_with_hugepages(const std::string &filename, size_t capacity=0)
Definition: mmap_array.h:214
gs::LFIndexer::LFIndexer
LFIndexer()
Definition: id_indexer.h:195
gs::LFIndexer::build_lf_indexer
friend void build_lf_indexer(const IdIndexer< _KEY_T, _INDEX_T > &input, const std::string &filename, LFIndexer< _INDEX_T > &output, const std::string &snapshot_dir, const std::string &work_dir, PropertyType type)
gs::LFIndexer::get_key
Any get_key(const INDEX_T &index) const
Definition: id_indexer.h:363
gs::PropertyType::additional_type_info
impl::AdditionalTypeInfo additional_type_info
Definition: types.h:98
gs::_move_data::key_buffer_t
typename id_indexer_impl::KeyBuffer< KEY_T >::type key_buffer_t
Definition: id_indexer.h:960
gs::PropertyType
Definition: types.h:95
gs::LFIndexer::size
size_t size() const
Definition: id_indexer.h:304
gs::impl::AdditionalTypeInfo::max_length
uint16_t max_length
Definition: types.h:91
gs::StringColumn
TypedColumn< std::string_view > StringColumn
Definition: column.h:576
gs::ColumnBase::resize
virtual void resize(size_t size)=0
gs::mmap_array::reset
void reset()
Definition: mmap_array.h:84
gs::Any::to_string
std::string to_string() const
Definition: types.h:560
gs::LFIndexer
Definition: id_indexer.h:184
gs::LFIndexer::~LFIndexer
~LFIndexer()
Definition: id_indexer.h:218
gs::id_indexer_impl::log2
int8_t log2(size_t value)
Definition: id_indexer.h:45
gs::PropertyType::kInt32
static const PropertyType kInt32
Definition: types.h:137
gs::ColumnBase
Definition: column.h:29
gs::LFIndexer::rehash
void rehash(size_t size)
Definition: id_indexer.h:269
gs::StorageStrategy::kMem
@ kMem
gs::Any::AsStringView
std::string_view AsStringView() const
Definition: types.h:641
gs::id_indexer_impl::max_load_factor
static constexpr double max_load_factor
Definition: id_indexer.h:43
gs::IdIndexer< std::string, int >::ind_buffer_t
std::vector< int > ind_buffer_t
Definition: id_indexer.h:544
gs::LFIndexer::num_slots_minus_one_
size_t num_slots_minus_one_
Definition: id_indexer.h:513
gs::LFIndexer::indices_size_
size_t indices_size_
Definition: id_indexer.h:511
gs::LFIndexer::indices_
mmap_array< INDEX_T > indices_
Definition: id_indexer.h:509
gs::LFIndexer::reserve
void reserve(size_t size)
Definition: id_indexer.h:267
gs::IdIndexer::keys
const key_buffer_t & keys() const
Definition: id_indexer.h:774
gs::LFIndexer::open_in_memory
void open_in_memory(const std::string &name)
Definition: id_indexer.h:390
gs::LFIndexer::warmup
void warmup(int thread_num) const
Definition: id_indexer.h:466