Flex  0.17.9
column.h
Go to the documentation of this file.
1 
16 #ifndef GRAPHSCOPE_PROPERTY_COLUMN_H_
17 #define GRAPHSCOPE_PROPERTY_COLUMN_H_
18 
19 #include <string>
20 #include <string_view>
21 #include "grape/utils/concurrent_queue.h"
22 
23 #include "flex/utils/mmap_array.h"
25 #include "grape/serialization/out_archive.h"
26 
27 namespace gs {
28 
29 std::string_view truncate_utf8(std::string_view str, size_t length);
30 
31 class ColumnBase {
32  public:
33  virtual ~ColumnBase() {}
34 
35  virtual void open(const std::string& name, const std::string& snapshot_dir,
36  const std::string& work_dir) = 0;
37 
38  virtual void open_in_memory(const std::string& name) = 0;
39 
40  virtual void open_with_hugepages(const std::string& name, bool force) = 0;
41 
42  virtual void close() = 0;
43 
44  virtual void touch(const std::string& filename) = 0;
45 
46  virtual void dump(const std::string& filename) = 0;
47 
48  virtual size_t size() const = 0;
49 
50  virtual void copy_to_tmp(const std::string& cur_path,
51  const std::string& tmp_path) = 0;
52  virtual void resize(size_t size) = 0;
53 
54  virtual PropertyType type() const = 0;
55 
56  virtual void set_any(size_t index, const Any& value) = 0;
57 
58  virtual Any get(size_t index) const = 0;
59 
60  virtual void ingest(uint32_t index, grape::OutArchive& arc) = 0;
61 
62  virtual StorageStrategy storage_strategy() const = 0;
63 };
64 
65 template <typename T>
66 class TypedColumn : public ColumnBase {
67  public:
68  TypedColumn(StorageStrategy strategy) : strategy_(strategy) {}
70 
71  void open(const std::string& name, const std::string& snapshot_dir,
72  const std::string& work_dir) override {
73  std::string basic_path = snapshot_dir + "/" + name;
74  if (std::filesystem::exists(basic_path)) {
75  basic_buffer_.open(basic_path, false);
76  basic_size_ = basic_buffer_.size();
77  } else {
78  basic_size_ = 0;
79  }
80  if (work_dir == "") {
81  extra_size_ = 0;
82  } else {
83  extra_buffer_.open(work_dir + "/" + name, true);
84  extra_size_ = extra_buffer_.size();
85  }
86  }
87 
88  void open_in_memory(const std::string& name) override {
89  if (!name.empty() && std::filesystem::exists(name)) {
90  basic_buffer_.open(name, false);
91  basic_size_ = basic_buffer_.size();
92  } else {
93  basic_buffer_.reset();
94  basic_size_ = 0;
95  }
96  extra_buffer_.reset();
97  extra_size_ = 0;
98  }
99 
100  void open_with_hugepages(const std::string& name, bool force) override {
101  if (strategy_ == StorageStrategy::kMem || force) {
102  if (!name.empty() && std::filesystem::exists(name)) {
103  basic_buffer_.open_with_hugepages(name);
104  basic_size_ = basic_buffer_.size();
105  } else {
106  basic_buffer_.reset();
107  basic_buffer_.set_hugepage_prefered(true);
108  basic_size_ = 0;
109  }
110  extra_buffer_.reset();
111  extra_buffer_.set_hugepage_prefered(true);
112  extra_size_ = 0;
113  } else if (strategy_ == StorageStrategy::kDisk) {
114  LOG(INFO) << "Open " << name << " with normal mmap pages";
115  open_in_memory(name);
116  }
117  }
118 
119  void touch(const std::string& filename) override {
120  mmap_array<T> tmp;
121  tmp.open(filename, true);
123  for (size_t k = 0; k < basic_size_; ++k) {
124  tmp.set(k, basic_buffer_.get(k));
125  }
126  for (size_t k = 0; k < extra_size_; ++k) {
127  tmp.set(k + basic_size_, extra_buffer_.get(k));
128  }
129  basic_size_ = 0;
130  basic_buffer_.reset();
131  extra_size_ = tmp.size();
132  extra_buffer_.swap(tmp);
133  tmp.reset();
134  }
135 
136  void close() override {
137  basic_buffer_.reset();
138  extra_buffer_.reset();
139  }
140 
141  void copy_to_tmp(const std::string& cur_path,
142  const std::string& tmp_path) override {
143  mmap_array<T> tmp;
144  if (!std::filesystem::exists(cur_path)) {
145  return;
146  }
147  copy_file(cur_path, tmp_path);
149  basic_size_ = 0;
150  tmp.open(tmp_path, true);
151  basic_buffer_.reset();
152  extra_buffer_.swap(tmp);
153  tmp.reset();
154  }
155 
156  void dump(const std::string& filename) override {
157  if (basic_size_ != 0 && extra_size_ == 0) {
158  basic_buffer_.dump(filename);
159  } else if (basic_size_ == 0 && extra_size_ != 0) {
160  extra_buffer_.dump(filename);
161  } else {
162  mmap_array<T> tmp;
163  tmp.open(filename, true);
164  for (size_t k = 0; k < basic_size_; ++k) {
165  tmp.set(k, basic_buffer_.get(k));
166  }
167  for (size_t k = 0; k < extra_size_; ++k) {
168  tmp.set(k + basic_size_, extra_buffer_.get(k));
169  }
170  }
171  }
172 
173  size_t size() const override { return basic_size_ + extra_size_; }
174 
175  void resize(size_t size) override {
176  if (size < basic_buffer_.size()) {
177  basic_size_ = size;
178  extra_size_ = 0;
179  } else {
180  basic_size_ = basic_buffer_.size();
182  extra_buffer_.resize(extra_size_);
183  }
184  }
185 
186  PropertyType type() const override { return AnyConverter<T>::type(); }
187 
188  void set_value(size_t index, const T& val) {
189  if (index >= basic_size_ && index < basic_size_ + extra_size_) {
190  extra_buffer_.set(index - basic_size_, val);
191  } else if (index < basic_size_) {
192  basic_buffer_.set(index, val);
193  } else {
194  throw std::runtime_error("Index out of range");
195  }
196  }
197 
198  void set_any(size_t index, const Any& value) override {
199  set_value(index, AnyConverter<T>::from_any(value));
200  }
201 
202  inline T get_view(size_t index) const {
203  return index < basic_size_ ? basic_buffer_.get(index)
204  : extra_buffer_.get(index - basic_size_);
205  }
206 
207  Any get(size_t index) const override {
208  return AnyConverter<T>::to_any(get_view(index));
209  }
210 
211  void ingest(uint32_t index, grape::OutArchive& arc) override {
212  T val;
213  arc >> val;
214  set_value(index, val);
215  }
216 
217  StorageStrategy storage_strategy() const override { return strategy_; }
218 
219  const mmap_array<T>& basic_buffer() const { return basic_buffer_; }
220  size_t basic_buffer_size() const { return basic_size_; }
221  const mmap_array<T>& extra_buffer() const { return extra_buffer_; }
222  size_t extra_buffer_size() const { return extra_size_; }
223 
224  private:
226  size_t basic_size_;
228  size_t extra_size_;
230 };
231 
232 template <>
234  public:
235  TypedColumn(const std::vector<PropertyType>& types) : types_(types) {
236  if (types.size() == 0) {
237  LOG(FATAL) << "RecordView column must have sub types.";
238  }
239  }
240 
242 
243  void open(const std::string& name, const std::string& snapshot_dir,
244  const std::string& work_dir) override {
245  LOG(FATAL) << "RecordView column does not support open.";
246  }
247 
248  void open_in_memory(const std::string& name) override;
249 
250  void open_with_hugepages(const std::string& name, bool force) override {
251  LOG(FATAL) << "RecordView column does not support open with hugepages.";
252  }
253 
254  void touch(const std::string& filename) override {
255  LOG(FATAL) << "RecordView column does not support touch.";
256  }
257 
258  void dump(const std::string& filename) override {
259  LOG(FATAL) << "RecordView column does not support dump.";
260  }
261 
262  void copy_to_tmp(const std::string& cur_path,
263  const std::string& tmp_path) override {
264  LOG(FATAL) << "RecordView column does not support copy_to_tmp.";
265  }
266  void close() override;
267 
268  size_t size() const override;
269  void resize(size_t size) override;
270 
271  PropertyType type() const override { return PropertyType::kRecordView; }
272 
273  void set_any(size_t index, const Any& value) override;
274 
275  void set_value(size_t index, const RecordView& val);
276 
277  RecordView get_view(size_t index) const;
278 
279  Any get(size_t index) const override;
280 
281  void ingest(uint32_t index, grape::OutArchive& arc) override {
282  LOG(FATAL) << "RecordView column does not support ingest.";
283  }
284 
286  LOG(ERROR) << "RecordView column does not have storage strategy.";
287  return StorageStrategy::kMem;
288  }
289 
290  std::vector<PropertyType> sub_types() const { return types_; }
291 
292  private:
293  std::vector<PropertyType> types_;
294  std::shared_ptr<Table> table_;
295 };
296 
309 
310 template <>
311 class TypedColumn<grape::EmptyType> : public ColumnBase {
312  public:
313  TypedColumn(StorageStrategy strategy) : strategy_(strategy) {}
315 
316  void open(const std::string& name, const std::string& snapshot_dir,
317  const std::string& work_dir) override {}
318  void open_in_memory(const std::string& name) override {}
319  void open_with_hugepages(const std::string& name, bool force) override {}
320  void touch(const std::string& filename) override {}
321  void dump(const std::string& filename) override {}
322  void copy_to_tmp(const std::string& cur_path,
323  const std::string& tmp_path) override {}
324  void close() override {}
325  size_t size() const override { return 0; }
326  void resize(size_t size) override {}
327 
328  PropertyType type() const override { return PropertyType::kEmpty; }
329 
330  void set_any(size_t index, const Any& value) override {}
331 
332  void set_value(size_t index, const grape::EmptyType& value) {}
333 
334  Any get(size_t index) const override { return Any(); }
335 
336  grape::EmptyType get_view(size_t index) const { return grape::EmptyType(); }
337 
338  void ingest(uint32_t index, grape::OutArchive& arc) override {}
339 
340  StorageStrategy storage_strategy() const override { return strategy_; }
341 
342  private:
344 };
345 template <>
346 class TypedColumn<std::string_view> : public ColumnBase {
347  public:
350  : strategy_(strategy), width_(width) {}
352 
353  void open(const std::string& name, const std::string& snapshot_dir,
354  const std::string& work_dir) override {
355  std::string basic_path = snapshot_dir + "/" + name;
356  if (std::filesystem::exists(basic_path + ".items")) {
357  basic_buffer_.open(basic_path, false);
358  basic_size_ = basic_buffer_.size();
359  basic_pos_ = basic_buffer_.data_size();
360  } else {
361  basic_size_ = 0;
362  basic_pos_ = 0;
363  }
364  if (work_dir == "") {
365  extra_size_ = 0;
366  pos_.store(0);
367  } else {
368  extra_buffer_.open(work_dir + "/" + name, true);
369  extra_size_ = extra_buffer_.size();
370  pos_.store(extra_buffer_.data_size());
371  }
372  }
373 
374  void open_in_memory(const std::string& prefix) override {
375  basic_buffer_.open(prefix, false);
376  basic_size_ = basic_buffer_.size();
377  basic_pos_ = basic_buffer_.data_size();
378 
379  extra_buffer_.reset();
380  extra_size_ = 0;
381  pos_.store(0);
382  }
383 
384  void open_with_hugepages(const std::string& prefix, bool force) override {
385  if (strategy_ == StorageStrategy::kMem || force) {
386  basic_buffer_.open_with_hugepages(prefix);
387  basic_size_ = basic_buffer_.size();
388  basic_pos_ = basic_buffer_.data_size();
389 
390  extra_buffer_.reset();
391  extra_buffer_.set_hugepage_prefered(true);
392  extra_size_ = 0;
393  pos_.store(0);
394  } else if (strategy_ == StorageStrategy::kDisk) {
395  LOG(INFO) << "Open " << prefix << " with normal mmap pages";
396  open_in_memory(prefix);
397  }
398  }
399 
400  void touch(const std::string& filename) override {
402  tmp.open(filename, true);
404  size_t offset = 0;
405  for (size_t k = 0; k < basic_size_; ++k) {
406  std::string_view val = basic_buffer_.get(k);
407  tmp.set(k, offset, val);
408  offset += val.size();
409  }
410  for (size_t k = 0; k < extra_size_; ++k) {
411  std::string_view val = extra_buffer_.get(k);
412  tmp.set(k + basic_size_, offset, val);
413  offset += val.size();
414  }
415 
416  basic_size_ = 0;
417  basic_pos_ = 0;
418  basic_buffer_.reset();
419  extra_size_ = tmp.size();
420  extra_buffer_.swap(tmp);
421  tmp.reset();
422 
423  pos_.store(offset);
424  }
425 
426  void close() override {
427  basic_buffer_.reset();
428  extra_buffer_.reset();
429  }
430 
431  void copy_to_tmp(const std::string& cur_path,
432  const std::string& tmp_path) override {
434  if (!std::filesystem::exists(cur_path + ".data")) {
435  return;
436  }
437  copy_file(cur_path + ".data", tmp_path + ".data");
438  copy_file(cur_path + ".items", tmp_path + ".items");
439 
441  basic_size_ = 0;
442  basic_pos_ = 0;
443  basic_buffer_.reset();
444  tmp.open(tmp_path, true);
445  extra_buffer_.swap(tmp);
446  tmp.reset();
447  pos_.store(extra_buffer_.data_size());
448  }
449 
450  void dump(const std::string& filename) override {
451  if (basic_size_ != 0 && extra_size_ == 0) {
452  basic_buffer_.resize(basic_size_, basic_pos_.load());
453  basic_buffer_.dump(filename);
454  } else if (basic_size_ == 0 && extra_size_ != 0) {
455  extra_buffer_.resize(extra_size_, pos_.load());
456  extra_buffer_.dump(filename);
457  } else {
459  tmp.open(filename, true);
461  (basic_size_ + extra_size_) * width_);
462  size_t offset = 0;
463  for (size_t k = 0; k < basic_size_; ++k) {
464  std::string_view val = basic_buffer_.get(k);
465  tmp.set(k, offset, val);
466  offset += val.size();
467  }
468  for (size_t k = 0; k < extra_size_; ++k) {
469  std::string_view val = extra_buffer_.get(k);
470  tmp.set(k + basic_size_, offset, extra_buffer_.get(k));
471  offset += val.size();
472  }
473  tmp.resize(basic_size_ + extra_size_, offset);
474  tmp.reset();
475  }
476  }
477 
478  size_t size() const override { return basic_size_ + extra_size_; }
479 
480  void resize(size_t size) override {
481  if (size < basic_buffer_.size()) {
482  basic_size_ = size;
483  extra_size_ = 0;
484  } else {
485  basic_size_ = basic_buffer_.size();
487  if (basic_buffer_.size() != 0) {
488  size_t basic_avg_width =
489  (basic_buffer_.data_size() + basic_buffer_.size() - 1) /
490  basic_buffer_.size();
491  // extra_size_ * basic_avg_width may be smaller than pos_.load()
492  extra_buffer_.resize(
493  extra_size_, std::max(extra_size_ * basic_avg_width, pos_.load()));
494  } else {
495  extra_buffer_.resize(extra_size_,
496  std::max(extra_size_ * width_, pos_.load()));
497  }
498  }
499  // resize `data` of basic_buffer
500  {
501  size_t pos = basic_pos_.load();
502  pos = pos + (pos + 4) / 5;
503  basic_buffer_.resize(basic_size_, pos);
504  }
505  }
506 
507  PropertyType type() const override { return PropertyType::Varchar(width_); }
508 
509  void set_value(size_t idx, const std::string_view& val) {
510  auto copied_val = val;
511  if (copied_val.size() >= width_) {
512  VLOG(1) << "String length" << copied_val.size()
513  << " exceeds the maximum length: " << width_ << ", cut off.";
514  copied_val = truncate_utf8(copied_val, width_);
515  }
516  if (idx >= basic_size_ && idx < basic_size_ + extra_size_) {
517  size_t offset = pos_.fetch_add(copied_val.size());
518  extra_buffer_.set(idx - basic_size_, offset, copied_val);
519  } else if (idx < basic_size_) {
520  size_t offset = basic_pos_.fetch_add(copied_val.size());
521  basic_buffer_.set(idx, offset, copied_val);
522  } else {
523  LOG(FATAL) << "Index out of range";
524  }
525  }
526 
527  void set_any(size_t idx, const Any& value) override {
528  set_value(idx, value.AsStringView());
529  }
530 
531  // make sure there is enough space for the value
532  void set_value_with_check(size_t idx, const std::string_view& value) {
533  if (idx >= basic_size_ && idx < basic_size_ + extra_size_) {
534  size_t offset = pos_.fetch_add(value.size());
535  if (pos_.load() > extra_buffer_.data_size()) {
536  extra_buffer_.resize(extra_buffer_.size(), pos_.load());
537  }
538  extra_buffer_.set(idx - basic_size_, offset, value);
539  } else if (idx < basic_size_) {
540  size_t offset = basic_pos_.fetch_add(value.size());
541  if (basic_pos_.load() > basic_buffer_.data_size()) {
542  basic_buffer_.resize(basic_buffer_.size(), basic_pos_.load());
543  }
544  basic_buffer_.set(idx, offset, value);
545  } else {
546  LOG(FATAL) << "Index out of range";
547  }
548  }
549 
550  inline std::string_view get_view(size_t idx) const {
551  return idx < basic_size_ ? basic_buffer_.get(idx)
552  : extra_buffer_.get(idx - basic_size_);
553  }
554 
555  Any get(size_t idx) const override {
557  }
558 
559  void ingest(uint32_t index, grape::OutArchive& arc) override {
560  std::string_view val;
561  arc >> val;
562  set_value(index, val);
563  }
564 
566  return basic_buffer_;
567  }
568 
569  StorageStrategy storage_strategy() const override { return strategy_; }
570 
571  size_t basic_buffer_size() const { return basic_size_; }
572 
574  return extra_buffer_;
575  }
576 
577  size_t extra_buffer_size() const { return extra_size_; }
578 
579  private:
581  size_t basic_size_;
583  size_t extra_size_;
584  std::atomic<size_t> pos_;
585  std::atomic<size_t> basic_pos_;
587  uint16_t width_;
588 };
589 
591 template <typename INDEX_T>
592 class LFIndexer;
593 
594 template <typename INDEX_T>
595 class StringMapColumn : public ColumnBase {
596  public:
598  : index_col_(strategy), meta_map_(nullptr) {
600  meta_map_->init(
602  }
603 
605  if (meta_map_) {
606  meta_map_->close();
607  delete meta_map_;
608  }
609  index_col_.close();
610  }
611 
612  void copy_to_tmp(const std::string& cur_path,
613  const std::string& tmp_path) override {
614  meta_map_->copy_to_tmp(cur_path + ".map_meta", tmp_path + ".map_meta");
615  index_col_.copy_to_tmp(cur_path, tmp_path);
616  }
617  void open(const std::string& name, const std::string& snapshot_dir,
618  const std::string& work_dir) override;
619  void open_in_memory(const std::string& name) override;
620  void open_with_hugepages(const std::string& name, bool force) override;
621  void dump(const std::string& filename) override;
622 
623  void touch(const std::string& filename) override {
624  index_col_.touch(filename);
625  }
626 
627  void close() override {
628  if (meta_map_ != nullptr) {
629  meta_map_->close();
630  }
631  index_col_.close();
632  }
633 
634  size_t size() const override { return index_col_.size(); }
635  void resize(size_t size) override { index_col_.resize(size); }
636 
637  PropertyType type() const override { return PropertyType::kStringMap; }
638 
639  void set_value(size_t idx, const std::string_view& val);
640 
641  void set_any(size_t idx, const Any& value) override {
642  set_value(idx, value.AsStringView());
643  }
644 
645  std::string_view get_view(size_t idx) const;
646 
647  Any get(size_t idx) const override {
649  }
650 
651  void ingest(uint32_t index, grape::OutArchive& arc) override {
652  std::string_view val;
653  arc >> val;
654  set_value(index, val);
655  }
656 
658  return index_col_.storage_strategy();
659  }
660 
661  const TypedColumn<INDEX_T>& get_index_col() const { return index_col_; }
662  const LFIndexer<INDEX_T>& get_meta_map() const { return *meta_map_; }
663 
664  private:
667  grape::SpinLock lock_;
668 };
669 
670 template <typename INDEX_T>
671 void StringMapColumn<INDEX_T>::open(const std::string& name,
672  const std::string& snapshot_dir,
673  const std::string& work_dir) {
674  index_col_.open(name, snapshot_dir, work_dir);
675  meta_map_->open(name + ".map_meta", snapshot_dir, work_dir);
676  meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
677 }
678 
679 template <typename INDEX_T>
680 void StringMapColumn<INDEX_T>::open_in_memory(const std::string& name) {
681  index_col_.open_in_memory(name);
682  meta_map_->open_in_memory(name + ".map_meta");
683  meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
684 }
685 
686 template <typename INDEX_T>
688  bool force) {
689  index_col_.open_with_hugepages(name, force);
690  meta_map_->open_with_hugepages(name + ".map_meta", true);
691  meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
692 }
693 
694 template <typename INDEX_T>
695 void StringMapColumn<INDEX_T>::dump(const std::string& filename) {
696  index_col_.dump(filename);
697  meta_map_->dump(filename + ".map_meta", "");
698 }
699 
700 template <typename INDEX_T>
701 std::string_view StringMapColumn<INDEX_T>::get_view(size_t idx) const {
702  INDEX_T ind = index_col_.get_view(idx);
703  return meta_map_->get_key(ind).AsStringView();
704 }
705 
706 template <typename INDEX_T>
708  const std::string_view& val) {
709  INDEX_T lid;
710  if (!meta_map_->get_index(val, lid)) {
711  lock_.lock();
712  if (!meta_map_->get_index(val, lid)) {
713  lid = meta_map_->insert(val);
714  }
715  lock_.unlock();
716  }
717  index_col_.set_value(idx, lid);
718 }
719 
721 
722 std::shared_ptr<ColumnBase> CreateColumn(
724  const std::vector<PropertyType>& sub_types = {});
725 
726 #ifdef USE_PTHASH
727 template <typename EDATA_T>
728 class ConcatColumn : public ColumnBase {
729  public:
730  ~ConcatColumn() {}
731 
732  ConcatColumn(const TypedColumn<EDATA_T>& basic_column,
733  const TypedColumn<EDATA_T>& extra_column)
734  : basic_column_(basic_column),
735  extra_column_(extra_column),
736  basic_size_(basic_column.size()) {}
737 
738  void open(const std::string& name, const std::string& snapshot_dir,
739  const std::string& work_dir) {
740  LOG(FATAL) << "not implemented";
741  }
742 
743  void open_in_memory(const std::string& name) {
744  LOG(FATAL) << "not implemented";
745  }
746 
747  void open_with_hugepages(const std::string& name, bool force) {
748  LOG(FATAL) << "not implemented";
749  }
750 
751  void close() { LOG(FATAL) << "not implemented"; }
752 
753  EDATA_T get_view(size_t index) const {
754  return index < basic_size_ ? basic_column_.get(index)
755  : extra_column_.get(index - basic_size_);
756  }
757 
758  void touch(const std::string& filename) { LOG(FATAL) << "not implemented"; }
759 
760  virtual void dump(const std::string& filename) {
761  LOG(FATAL) << "not implemented";
762  }
763 
764  size_t size() const { return basic_size_ + extra_column_.size(); }
765 
766  void copy_to_tmp(const std::string& cur_path, const std::string& tmp_path) {
767  LOG(FATAL) << "not implemented";
768  }
769  void resize(size_t size) { LOG(FATAL) << "not implemented"; }
770 
771  PropertyType type() const { return AnyConverter<EDATA_T>::type(); }
772 
773  void set_any(size_t index, const Any& value) {
774  LOG(FATAL) << "not implemented";
775  }
776 
777  Any get(size_t index) const {
778  if (index < basic_size_) {
779  return basic_column_.get(index);
780  } else {
781  return extra_column_.get(index - basic_size_);
782  }
783  }
784 
785  void ingest(uint32_t index, grape::OutArchive& arc) {
786  LOG(FATAL) << "not implemented";
787  }
788 
789  StorageStrategy storage_strategy() const {
790  return basic_column_.storage_strategy();
791  }
792 
793  private:
794  const TypedColumn<EDATA_T>& basic_column_;
795  const TypedColumn<EDATA_T>& extra_column_;
796  size_t basic_size_;
797 };
798 #endif
799 
802  public:
803  virtual ~RefColumnBase() {}
804  virtual Any get(size_t index) const = 0;
805 };
806 
807 // Different from TypedColumn, RefColumn is a wrapper of mmap_array
808 template <typename T>
810  public:
811  using value_type = T;
812 
814  : basic_buffer(buffer),
815  basic_size(0),
816  extra_buffer(buffer),
817  extra_size(buffer.size()),
818  strategy_(strategy) {}
820  : basic_buffer(column.basic_buffer()),
821  basic_size(column.basic_buffer_size()),
822  extra_buffer(column.extra_buffer()),
823  extra_size(column.extra_buffer_size()),
824  strategy_(column.storage_strategy()) {}
826 
827  inline T get_view(size_t index) const {
828  return index < basic_size ? basic_buffer.get(index)
829  : extra_buffer.get(index - basic_size);
830  }
831 
832  size_t size() const { return basic_size + extra_size; }
833 
834  Any get(size_t index) const override {
835  return AnyConverter<T>::to_any(get_view(index));
836  }
837 
838  private:
840  size_t basic_size;
842  size_t extra_size;
843 
845 };
846 
847 template <>
849  public:
850  TypedRefColumn(LabelKey label_key) : label_key_(label_key) {}
851 
853 
854  inline LabelKey get_view(size_t index) const { return label_key_; }
855 
856  Any get(size_t index) const override {
857  LOG(ERROR) << "LabelKeyColumn does not support get() to Any";
858  return Any();
859  }
860 
861  private:
863 };
864 
865 template <>
867  public:
869  TypedRefColumn(label_t label_key) : label_key_(label_key) {}
870 
872 
873  inline GlobalId get_view(size_t index) const {
874  return GlobalId(label_key_, index);
875  }
876 
877  Any get(size_t index) const override {
878  LOG(ERROR) << "GlobalId Column does not support get() to Any";
879  return Any();
880  }
881 
882  private:
884 };
885 
886 // Create a reference column from a ColumnBase that contains a const reference
887 // to the actual column storage, offering a column-based store interface for
888 // vertex properties.
889 std::shared_ptr<RefColumnBase> CreateRefColumn(
890  std::shared_ptr<ColumnBase> column);
891 
892 } // namespace gs
893 
894 #endif // GRAPHSCOPE_PROPERTY_COLUMN_H_
gs::TypedColumn< std::string_view >::extra_buffer
const mmap_array< std::string_view > & extra_buffer() const
Definition: column.h:573
gs::TypedColumn< std::string_view >::extra_buffer_size
size_t extra_buffer_size() const
Definition: column.h:577
grape
Definition: types.h:33
gs::TypedColumn::touch
void touch(const std::string &filename) override
Definition: column.h:119
gs::TypedColumn::resize
void resize(size_t size) override
Definition: column.h:175
gs::TypedRefColumn::get_view
T get_view(size_t index) const
Definition: column.h:827
gs::TypedRefColumn::size
size_t size() const
Definition: column.h:832
gs::TypedColumn< std::string_view >::basic_buffer_size
size_t basic_buffer_size() const
Definition: column.h:571
gs::TypedRefColumn< LabelKey >::get
Any get(size_t index) const override
Definition: column.h:856
gs::TypedColumn::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:71
gs::mmap_array< std::string_view >::resize
void resize(size_t size, size_t data_size)
Definition: mmap_array.h:483
gs::mmap_array< std::string_view >::size
size_t size() const
Definition: mmap_array.h:498
gs::StringMapColumn::get_meta_map
const LFIndexer< INDEX_T > & get_meta_map() const
Definition: column.h:662
gs::StringMapColumn::dump
void dump(const std::string &filename) override
Definition: column.h:695
gs::Any
Definition: types.h:395
gs::TypedColumn< std::string_view >::type
PropertyType type() const override
Definition: column.h:507
gs::StringMapColumn::get_view
std::string_view get_view(size_t idx) const
Definition: column.h:701
gs::ColumnBase::open
virtual void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)=0
gs::TypedColumn< RecordView >::types_
std::vector< PropertyType > types_
Definition: column.h:293
gs::TypedColumn< std::string_view >::resize
void resize(size_t size) override
Definition: column.h:480
gs::TypedColumn::TypedColumn
TypedColumn(StorageStrategy strategy)
Definition: column.h:68
gs::TypedColumn< std::string_view >::dump
void dump(const std::string &filename) override
Definition: column.h:450
gs::TypedColumn::set_value
void set_value(size_t index, const T &val)
Definition: column.h:188
gs::TypedColumn< RecordView >::touch
void touch(const std::string &filename) override
Definition: column.h:254
gs::ColumnBase::type
virtual PropertyType type() const =0
types.h
gs::TypedColumn< RecordView >::type
PropertyType type() const override
Definition: column.h:271
gs::mmap_array< std::string_view >::open
void open(const std::string &filename, bool sync_to_file)
Definition: mmap_array.h:463
gs::StringMapColumn::lock_
grape::SpinLock lock_
Definition: column.h:667
gs::TypedColumn< std::string_view >::basic_buffer
const mmap_array< std::string_view > & basic_buffer() const
Definition: column.h:565
gs::TypedColumn< RecordView >::table_
std::shared_ptr< Table > table_
Definition: column.h:294
gs::TypedColumn< std::string_view >::width_
uint16_t width_
Definition: column.h:587
gs::TypedRefColumn< GlobalId >::~TypedRefColumn
~TypedRefColumn()
Definition: column.h:871
gs::TypedRefColumn< LabelKey >::label_key_
LabelKey label_key_
Definition: column.h:862
gs::TypedColumn< grape::EmptyType >::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:319
gs::TypedColumn::basic_buffer_
mmap_array< T > basic_buffer_
Definition: column.h:225
gs::TypedColumn< grape::EmptyType >::set_any
void set_any(size_t index, const Any &value) override
Definition: column.h:330
gs::RefColumnBase
Create RefColumn for ease of usage for hqps.
Definition: column.h:801
gs::TypedColumn< RecordView >::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:285
gs::TypedColumn::close
void close() override
Definition: column.h:136
gs::StringMapColumn::get_index_col
const TypedColumn< INDEX_T > & get_index_col() const
Definition: column.h:661
gs::TypedColumn::size
size_t size() const override
Definition: column.h:173
gs::StringMapColumn::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:651
gs::TypedRefColumn::extra_buffer
const mmap_array< T > & extra_buffer
Definition: column.h:841
gs::TypedColumn< std::string_view >::size
size_t size() const override
Definition: column.h:478
gs::StringMapColumn::set_any
void set_any(size_t idx, const Any &value) override
Definition: column.h:641
gs::StringMapColumn::StringMapColumn
StringMapColumn(StorageStrategy strategy)
Definition: column.h:597
gs::TypedColumn< RecordView >::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:243
gs::TypedColumn< grape::EmptyType >::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:340
gs::StringMapColumn::close
void close() override
Definition: column.h:627
gs::PropertyType::Varchar
static PropertyType Varchar(uint16_t max_length)
Definition: types.cc:335
gs::TypedColumn< std::string_view >::~TypedColumn
~TypedColumn()
Definition: column.h:351
gs::TypedColumn::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:100
gs::mmap_array::resize
void resize(size_t size)
Definition: mmap_array.h:319
gs::StringMapColumn::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:687
gs::GlobalId
Definition: types.h:163
gs::StorageStrategy::kDisk
@ kDisk
gs::TypedColumn::type
PropertyType type() const override
Definition: column.h:186
gs::StringMapColumn::size
size_t size() const override
Definition: column.h:634
gs::LabelKey::label_data_type
uint8_t label_data_type
Definition: types.h:280
gs::TypedColumn< std::string_view >::open_in_memory
void open_in_memory(const std::string &prefix) override
Definition: column.h:374
gs
Definition: adj_list.h:23
gs::TypedColumn
Definition: column.h:66
gs::StringMapColumn::~StringMapColumn
~StringMapColumn()
Definition: column.h:604
gs::TypedColumn< RecordView >::dump
void dump(const std::string &filename) override
Definition: column.h:258
gs::TypedColumn< grape::EmptyType >::get
Any get(size_t index) const override
Definition: column.h:334
gs::PropertyType::kEmpty
static const PropertyType kEmpty
Definition: types.h:133
gs::StorageStrategy
StorageStrategy
Definition: types.h:58
gs::StringMapColumn::index_col_
TypedColumn< INDEX_T > index_col_
Definition: column.h:665
gs::TypedColumn< std::string_view >::close
void close() override
Definition: column.h:426
gs::TypedRefColumn::get
Any get(size_t index) const override
Definition: column.h:834
gs::CreateRefColumn
std::shared_ptr< RefColumnBase > CreateRefColumn(std::shared_ptr< ColumnBase > column)
Definition: column.cc:221
gs::TypedColumn::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:211
gs::TypedColumn::get
Any get(size_t index) const override
Definition: column.h:207
gs::mmap_array< std::string_view >::reset
void reset()
Definition: mmap_array.h:453
gs::TypedRefColumn< LabelKey >::get_view
LabelKey get_view(size_t index) const
Definition: column.h:854
gs::TypedColumn::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:217
gs::TypedColumn< std::string_view >::extra_buffer_
mmap_array< std::string_view > extra_buffer_
Definition: column.h:582
gs::mmap_array::open
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
gs::StringMapColumn::resize
void resize(size_t size) override
Definition: column.h:635
gs::TypedRefColumn::basic_size
size_t basic_size
Definition: column.h:840
gs::TypedColumn::extra_buffer
const mmap_array< T > & extra_buffer() const
Definition: column.h:221
gs::StringMapColumn::open_in_memory
void open_in_memory(const std::string &name) override
Definition: column.h:680
gs::TypedColumn< grape::EmptyType >::strategy_
StorageStrategy strategy_
Definition: column.h:343
gs::TypedRefColumn::extra_size
size_t extra_size
Definition: column.h:842
gs::ColumnBase::storage_strategy
virtual StorageStrategy storage_strategy() const =0
gs::TypedColumn< grape::EmptyType >::TypedColumn
TypedColumn(StorageStrategy strategy)
Definition: column.h:313
gs::PropertyType::STRING_DEFAULT_MAX_LENGTH
static constexpr const uint16_t STRING_DEFAULT_MAX_LENGTH
Definition: types.h:96
gs::TypedRefColumn< GlobalId >::get
Any get(size_t index) const override
Definition: column.h:877
gs::TypedColumn< std::string_view >::strategy_
StorageStrategy strategy_
Definition: column.h:586
gs::TypedColumn::get_view
T get_view(size_t index) const
Definition: column.h:202
gs::TypedRefColumn::TypedRefColumn
TypedRefColumn(const TypedColumn< T > &column)
Definition: column.h:819
gs::CreateColumn
std::shared_ptr< ColumnBase > CreateColumn(PropertyType type, StorageStrategy strategy, const std::vector< PropertyType > &sub_types)
Definition: column.cc:141
gs::TypedColumn< std::string_view >::pos_
std::atomic< size_t > pos_
Definition: column.h:584
gs::TypedColumn< grape::EmptyType >::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:338
gs::TypedColumn< std::string_view >::set_value
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:509
gs::TypedColumn::basic_buffer_size
size_t basic_buffer_size() const
Definition: column.h:220
gs::TypedRefColumn::TypedRefColumn
TypedRefColumn(const mmap_array< T > &buffer, StorageStrategy strategy)
Definition: column.h:813
gs::TypedColumn< std::string_view >::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:569
gs::TypedColumn< std::string_view >
Definition: column.h:346
gs::TypedColumn< std::string_view >::TypedColumn
TypedColumn(StorageStrategy strategy, uint16_t width=PropertyType::STRING_DEFAULT_MAX_LENGTH)
Definition: column.h:348
gs::TypedColumn< std::string_view >::basic_pos_
std::atomic< size_t > basic_pos_
Definition: column.h:585
gs::TypedColumn< RecordView >::~TypedColumn
~TypedColumn()
Definition: column.h:241
gs::TypedColumn< std::string_view >::basic_size_
size_t basic_size_
Definition: column.h:581
gs::TypedRefColumn< LabelKey >::~TypedRefColumn
~TypedRefColumn()
Definition: column.h:852
gs::TypedColumn::open_in_memory
void open_in_memory(const std::string &name) override
Definition: column.h:88
gs::TypedColumn< std::string_view >::open_with_hugepages
void open_with_hugepages(const std::string &prefix, bool force) override
Definition: column.h:384
gs::mmap_array::size
size_t size() const
Definition: mmap_array.h:415
gs::TypedColumn< RecordView >::TypedColumn
TypedColumn(const std::vector< PropertyType > &types)
Definition: column.h:235
gs::ColumnBase::copy_to_tmp
virtual void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)=0
gs::StringMapColumn::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:671
gs::PropertyType::kRecordView
static const PropertyType kRecordView
Definition: types.h:149
gs::ColumnBase::set_any
virtual void set_any(size_t index, const Any &value)=0
gs::StringMapColumn::touch
void touch(const std::string &filename) override
Definition: column.h:623
gs::mmap_array< std::string_view >
Definition: mmap_array.h:447
gs::TypedColumn::strategy_
StorageStrategy strategy_
Definition: column.h:229
gs::TypedColumn< std::string_view >::extra_size_
size_t extra_size_
Definition: column.h:583
gs::RefColumnBase::~RefColumnBase
virtual ~RefColumnBase()
Definition: column.h:803
gs::LabelKey
Definition: types.h:279
gs::TypedColumn< RecordView >::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:281
gs::TypedColumn< std::string_view >::touch
void touch(const std::string &filename) override
Definition: column.h:400
gs::TypedColumn::~TypedColumn
~TypedColumn()
Definition: column.h:69
gs::TypedColumn< grape::EmptyType >::size
size_t size() const override
Definition: column.h:325
gs::TypedRefColumn< GlobalId >::TypedRefColumn
TypedRefColumn(label_t label_key)
Definition: column.h:869
gs::ColumnBase::dump
virtual void dump(const std::string &filename)=0
gs::TypedColumn::dump
void dump(const std::string &filename) override
Definition: column.h:156
gs::TypedColumn::extra_buffer_
mmap_array< T > extra_buffer_
Definition: column.h:227
gs::TypedColumn< grape::EmptyType >::~TypedColumn
~TypedColumn()
Definition: column.h:314
gs::TypedRefColumn::value_type
T value_type
Definition: column.h:811
gs::TypedColumn< grape::EmptyType >::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:316
mmap_array.h
gs::ColumnBase::get
virtual Any get(size_t index) const =0
gs::StringMapColumn
Definition: column.h:595
gs::ColumnBase::~ColumnBase
virtual ~ColumnBase()
Definition: column.h:33
gs::ColumnBase::size
virtual size_t size() const =0
gs::StringMapColumn::type
PropertyType type() const override
Definition: column.h:637
gs::ColumnBase::open_in_memory
virtual void open_in_memory(const std::string &name)=0
gs::TypedColumn< std::string_view >::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:559
gs::copy_file
void copy_file(const std::string &src, const std::string &dst)
Definition: file_names.h:80
gs::TypedRefColumn< GlobalId >::get_view
GlobalId get_view(size_t index) const
Definition: column.h:873
gs::TypedColumn::extra_size_
size_t extra_size_
Definition: column.h:228
gs::TypedColumn::set_any
void set_any(size_t index, const Any &value) override
Definition: column.h:198
gs::TypedRefColumn
Definition: column.h:809
gs::truncate_utf8
std::string_view truncate_utf8(std::string_view str, size_t length)
Definition: column.cc:25
gs::ColumnBase::close
virtual void close()=0
gs::TypedColumn< grape::EmptyType >::touch
void touch(const std::string &filename) override
Definition: column.h:320
gs::mmap_array
Definition: mmap_array.h:65
gs::StringMapColumn::get
Any get(size_t idx) const override
Definition: column.h:647
gs::StringMapColumn::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:657
gs::snapshot_dir
std::string snapshot_dir(const std::string &work_dir, uint32_t version)
Definition: file_names.h:192
std
Definition: loading_config.h:232
gs::TypedRefColumn::strategy_
StorageStrategy strategy_
Definition: column.h:844
gs::ColumnBase::open_with_hugepages
virtual void open_with_hugepages(const std::string &name, bool force)=0
gs::TypedColumn< grape::EmptyType >::set_value
void set_value(size_t index, const grape::EmptyType &value)
Definition: column.h:332
gs::TypedColumn< RecordView >::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:262
gs::TypedColumn< std::string_view >::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:353
gs::TypedColumn< grape::EmptyType >::close
void close() override
Definition: column.h:324
gs::TypedColumn< std::string_view >::get_view
std::string_view get_view(size_t idx) const
Definition: column.h:550
gs::StringMapColumn::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:612
gs::RefColumnBase::get
virtual Any get(size_t index) const =0
gs::TypedColumn< RecordView >::sub_types
std::vector< PropertyType > sub_types() const
Definition: column.h:290
gs::AnyConverter
Definition: types.h:393
gs::TypedRefColumn::basic_buffer
const mmap_array< T > & basic_buffer
Definition: column.h:839
gs::ColumnBase::ingest
virtual void ingest(uint32_t index, grape::OutArchive &arc)=0
gs::TypedColumn< grape::EmptyType >::dump
void dump(const std::string &filename) override
Definition: column.h:321
gs::TypedColumn< std::string_view >::get
Any get(size_t idx) const override
Definition: column.h:555
gs::mmap_array< std::string_view >::set
void set(size_t idx, size_t offset, const std::string_view &val)
Definition: mmap_array.h:488
gs::TypedColumn::extra_buffer_size
size_t extra_buffer_size() const
Definition: column.h:222
gs::TypedRefColumn::~TypedRefColumn
~TypedRefColumn()
Definition: column.h:825
gs::TypedRefColumn< GlobalId >::label_key_
label_t label_key_
Definition: column.h:883
gs::TypedColumn< grape::EmptyType >::resize
void resize(size_t size) override
Definition: column.h:326
gs::TypedColumn< grape::EmptyType >::open_in_memory
void open_in_memory(const std::string &name) override
Definition: column.h:318
gs::TypedColumn< grape::EmptyType >::get_view
grape::EmptyType get_view(size_t index) const
Definition: column.h:336
gs::TypedColumn< std::string_view >::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:431
gs::StringMapColumn::meta_map_
LFIndexer< INDEX_T > * meta_map_
Definition: column.h:666
gs::PropertyType
Definition: types.h:95
gs::RecordView
Definition: types.h:288
gs::TypedColumn< RecordView >
Definition: column.h:233
gs::TypedRefColumn< LabelKey >::TypedRefColumn
TypedRefColumn(LabelKey label_key)
Definition: column.h:850
gs::StringMapColumn::set_value
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:707
gs::TypedColumn< grape::EmptyType >::type
PropertyType type() const override
Definition: column.h:328
gs::ColumnBase::resize
virtual void resize(size_t size)=0
gs::mmap_array::reset
void reset()
Definition: mmap_array.h:84
gs::mmap_array::set
void set(size_t idx, const T &val)
Definition: mmap_array.h:408
gs::ColumnBase::touch
virtual void touch(const std::string &filename)=0
gs::TypedColumn::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:141
gs::TypedColumn::basic_buffer
const mmap_array< T > & basic_buffer() const
Definition: column.h:219
gs::LFIndexer
Definition: id_indexer.h:184
gs::PropertyType::kStringMap
static const PropertyType kStringMap
Definition: types.h:146
gs::TypedColumn< std::string_view >::set_any
void set_any(size_t idx, const Any &value) override
Definition: column.h:527
gs::TypedRefColumn< GlobalId >::label_t
typename LabelKey::label_data_type label_t
Definition: column.h:868
gs::ColumnBase
Definition: column.h:31
gs::StorageStrategy::kMem
@ kMem
gs::Any::AsStringView
std::string_view AsStringView() const
Definition: types.h:653
gs::TypedColumn::basic_size_
size_t basic_size_
Definition: column.h:226
gs::TypedColumn< std::string_view >::basic_buffer_
mmap_array< std::string_view > basic_buffer_
Definition: column.h:580
gs::TypedColumn< std::string_view >::set_value_with_check
void set_value_with_check(size_t idx, const std::string_view &value)
Definition: column.h:532
gs::TypedColumn< RecordView >::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:250
gs::TypedColumn< grape::EmptyType >::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:322