Flex  0.17.9
column.h
Go to the documentation of this file.
1 
16 #ifndef GRAPHSCOPE_PROPERTY_COLUMN_H_
17 #define GRAPHSCOPE_PROPERTY_COLUMN_H_
18 
19 #include <shared_mutex>
20 #include <string>
21 #include <string_view>
22 #include "grape/utils/concurrent_queue.h"
23 
24 #include "flex/utils/mmap_array.h"
26 #include "grape/serialization/out_archive.h"
27 
28 namespace gs {
29 
30 std::string_view truncate_utf8(std::string_view str, size_t length);
31 
32 class ColumnBase {
33  public:
34  virtual ~ColumnBase() {}
35 
36  virtual void open(const std::string& name, const std::string& snapshot_dir,
37  const std::string& work_dir) = 0;
38 
39  virtual void open_in_memory(const std::string& name) = 0;
40 
41  virtual void open_with_hugepages(const std::string& name, bool force) = 0;
42 
43  virtual void close() = 0;
44 
45  virtual void touch(const std::string& filename) = 0;
46 
47  virtual void dump(const std::string& filename) = 0;
48 
49  virtual size_t size() const = 0;
50 
51  virtual void copy_to_tmp(const std::string& cur_path,
52  const std::string& tmp_path) = 0;
53  virtual void resize(size_t size) = 0;
54 
55  virtual PropertyType type() const = 0;
56 
57  virtual void set_any(size_t index, const Any& value) = 0;
58 
59  virtual Any get(size_t index) const = 0;
60 
61  virtual void ingest(uint32_t index, grape::OutArchive& arc) = 0;
62 
63  virtual StorageStrategy storage_strategy() const = 0;
64 };
65 
66 template <typename T>
67 class TypedColumn : public ColumnBase {
68  public:
69  TypedColumn(StorageStrategy strategy) : strategy_(strategy) {}
71 
72  void open(const std::string& name, const std::string& snapshot_dir,
73  const std::string& work_dir) override {
74  std::string basic_path = snapshot_dir + "/" + name;
75  if (std::filesystem::exists(basic_path)) {
76  basic_buffer_.open(basic_path, false);
77  basic_size_ = basic_buffer_.size();
78  } else {
79  basic_size_ = 0;
80  }
81  if (work_dir == "") {
82  extra_size_ = 0;
83  } else {
84  extra_buffer_.open(work_dir + "/" + name, true);
85  extra_size_ = extra_buffer_.size();
86  }
87  }
88 
89  void open_in_memory(const std::string& name) override {
90  if (!name.empty() && std::filesystem::exists(name)) {
91  basic_buffer_.open(name, false);
92  basic_size_ = basic_buffer_.size();
93  } else {
94  basic_buffer_.reset();
95  basic_size_ = 0;
96  }
97  extra_buffer_.reset();
98  extra_size_ = 0;
99  }
100 
101  void open_with_hugepages(const std::string& name, bool force) override {
102  if (strategy_ == StorageStrategy::kMem || force) {
103  if (!name.empty() && std::filesystem::exists(name)) {
104  basic_buffer_.open_with_hugepages(name);
105  basic_size_ = basic_buffer_.size();
106  } else {
107  basic_buffer_.reset();
108  basic_buffer_.set_hugepage_prefered(true);
109  basic_size_ = 0;
110  }
111  extra_buffer_.reset();
112  extra_buffer_.set_hugepage_prefered(true);
113  extra_size_ = 0;
114  } else if (strategy_ == StorageStrategy::kDisk) {
115  LOG(INFO) << "Open " << name << " with normal mmap pages";
116  open_in_memory(name);
117  }
118  }
119 
120  void touch(const std::string& filename) override {
121  mmap_array<T> tmp;
122  tmp.open(filename, true);
124  for (size_t k = 0; k < basic_size_; ++k) {
125  tmp.set(k, basic_buffer_.get(k));
126  }
127  for (size_t k = 0; k < extra_size_; ++k) {
128  tmp.set(k + basic_size_, extra_buffer_.get(k));
129  }
130  basic_size_ = 0;
131  basic_buffer_.reset();
132  extra_size_ = tmp.size();
133  extra_buffer_.swap(tmp);
134  tmp.reset();
135  }
136 
137  void close() override {
138  basic_buffer_.reset();
139  extra_buffer_.reset();
140  }
141 
142  void copy_to_tmp(const std::string& cur_path,
143  const std::string& tmp_path) override {
144  mmap_array<T> tmp;
145  if (!std::filesystem::exists(cur_path)) {
146  return;
147  }
148  copy_file(cur_path, tmp_path);
150  basic_size_ = 0;
151  tmp.open(tmp_path, true);
152  basic_buffer_.reset();
153  extra_buffer_.swap(tmp);
154  tmp.reset();
155  }
156 
157  void dump(const std::string& filename) override {
158  if (basic_size_ != 0 && extra_size_ == 0) {
159  basic_buffer_.dump(filename);
160  } else if (basic_size_ == 0 && extra_size_ != 0) {
161  extra_buffer_.dump(filename);
162  } else {
163  mmap_array<T> tmp;
164  tmp.open(filename, true);
165  for (size_t k = 0; k < basic_size_; ++k) {
166  tmp.set(k, basic_buffer_.get(k));
167  }
168  for (size_t k = 0; k < extra_size_; ++k) {
169  tmp.set(k + basic_size_, extra_buffer_.get(k));
170  }
171  }
172  }
173 
174  size_t size() const override { return basic_size_ + extra_size_; }
175 
176  void resize(size_t size) override {
177  if (size < basic_buffer_.size()) {
178  basic_size_ = size;
179  extra_size_ = 0;
180  } else {
181  basic_size_ = basic_buffer_.size();
183  extra_buffer_.resize(extra_size_);
184  }
185  }
186 
187  PropertyType type() const override { return AnyConverter<T>::type(); }
188 
189  void set_value(size_t index, const T& val) {
190  if (index >= basic_size_ && index < basic_size_ + extra_size_) {
191  extra_buffer_.set(index - basic_size_, val);
192  } else if (index < basic_size_) {
193  basic_buffer_.set(index, val);
194  } else {
195  throw std::runtime_error("Index out of range");
196  }
197  }
198 
199  void set_any(size_t index, const Any& value) override {
200  set_value(index, AnyConverter<T>::from_any(value));
201  }
202 
203  inline T get_view(size_t index) const {
204  return index < basic_size_ ? basic_buffer_.get(index)
205  : extra_buffer_.get(index - basic_size_);
206  }
207 
208  Any get(size_t index) const override {
209  return AnyConverter<T>::to_any(get_view(index));
210  }
211 
212  void ingest(uint32_t index, grape::OutArchive& arc) override {
213  T val;
214  arc >> val;
215  set_value(index, val);
216  }
217 
218  StorageStrategy storage_strategy() const override { return strategy_; }
219 
220  const mmap_array<T>& basic_buffer() const { return basic_buffer_; }
221  size_t basic_buffer_size() const { return basic_size_; }
222  const mmap_array<T>& extra_buffer() const { return extra_buffer_; }
223  size_t extra_buffer_size() const { return extra_size_; }
224 
225  private:
227  size_t basic_size_;
229  size_t extra_size_;
231 };
232 
233 template <>
235  public:
236  TypedColumn(const std::vector<PropertyType>& types) : types_(types) {
237  if (types.size() == 0) {
238  LOG(FATAL) << "RecordView column must have sub types.";
239  }
240  }
241 
243 
244  void open(const std::string& name, const std::string& snapshot_dir,
245  const std::string& work_dir) override {
246  LOG(FATAL) << "RecordView column does not support open.";
247  }
248 
249  void open_in_memory(const std::string& name) override;
250 
251  void open_with_hugepages(const std::string& name, bool force) override {
252  LOG(FATAL) << "RecordView column does not support open with hugepages.";
253  }
254 
255  void touch(const std::string& filename) override {
256  LOG(FATAL) << "RecordView column does not support touch.";
257  }
258 
259  void dump(const std::string& filename) override {
260  LOG(FATAL) << "RecordView column does not support dump.";
261  }
262 
263  void copy_to_tmp(const std::string& cur_path,
264  const std::string& tmp_path) override {
265  LOG(FATAL) << "RecordView column does not support copy_to_tmp.";
266  }
267  void close() override;
268 
269  size_t size() const override;
270  void resize(size_t size) override;
271 
272  PropertyType type() const override { return PropertyType::kRecordView; }
273 
274  void set_any(size_t index, const Any& value) override;
275 
276  void set_value(size_t index, const RecordView& val);
277 
278  RecordView get_view(size_t index) const;
279 
280  Any get(size_t index) const override;
281 
282  void ingest(uint32_t index, grape::OutArchive& arc) override {
283  LOG(FATAL) << "RecordView column does not support ingest.";
284  }
285 
287  LOG(ERROR) << "RecordView column does not have storage strategy.";
288  return StorageStrategy::kMem;
289  }
290 
291  std::vector<PropertyType> sub_types() const { return types_; }
292 
293  private:
294  std::vector<PropertyType> types_;
295  std::shared_ptr<Table> table_;
296 };
297 
310 
311 template <>
312 class TypedColumn<grape::EmptyType> : public ColumnBase {
313  public:
314  TypedColumn(StorageStrategy strategy) : strategy_(strategy) {}
316 
317  void open(const std::string& name, const std::string& snapshot_dir,
318  const std::string& work_dir) override {}
319  void open_in_memory(const std::string& name) override {}
320  void open_with_hugepages(const std::string& name, bool force) override {}
321  void touch(const std::string& filename) override {}
322  void dump(const std::string& filename) override {}
323  void copy_to_tmp(const std::string& cur_path,
324  const std::string& tmp_path) override {}
325  void close() override {}
326  size_t size() const override { return 0; }
327  void resize(size_t size) override {}
328 
329  PropertyType type() const override { return PropertyType::kEmpty; }
330 
331  void set_any(size_t index, const Any& value) override {}
332 
333  void set_value(size_t index, const grape::EmptyType& value) {}
334 
335  Any get(size_t index) const override { return Any(); }
336 
337  grape::EmptyType get_view(size_t index) const { return grape::EmptyType(); }
338 
339  void ingest(uint32_t index, grape::OutArchive& arc) override {}
340 
341  StorageStrategy storage_strategy() const override { return strategy_; }
342 
343  private:
345 };
346 template <>
347 class TypedColumn<std::string_view> : public ColumnBase {
348  public:
349  TypedColumn(StorageStrategy strategy, uint16_t width)
350  : strategy_(strategy),
351  width_(width),
352  type_(PropertyType::Varchar(width_)) {}
354  : strategy_(strategy),
355  width_(PropertyType::GetStringDefaultMaxLength()),
356  type_(PropertyType::kStringView) {}
358 
359  void open(const std::string& name, const std::string& snapshot_dir,
360  const std::string& work_dir) override {
361  std::string basic_path = snapshot_dir + "/" + name;
362  if (std::filesystem::exists(basic_path + ".items")) {
363  basic_buffer_.open(basic_path, false);
364  basic_size_ = basic_buffer_.size();
365  basic_pos_ = basic_buffer_.data_size();
366  } else {
367  basic_size_ = 0;
368  basic_pos_ = 0;
369  }
370  if (work_dir == "") {
371  extra_size_ = 0;
372  pos_.store(0);
373  } else {
374  extra_buffer_.open(work_dir + "/" + name, true);
375  extra_size_ = extra_buffer_.size();
376  pos_.store(extra_buffer_.data_size());
377  }
378  }
379 
380  void open_in_memory(const std::string& prefix) override {
381  basic_buffer_.open(prefix, false);
382  basic_size_ = basic_buffer_.size();
383  basic_pos_ = basic_buffer_.data_size();
384 
385  extra_buffer_.reset();
386  extra_size_ = 0;
387  pos_.store(0);
388  }
389 
390  void open_with_hugepages(const std::string& prefix, bool force) override {
391  if (strategy_ == StorageStrategy::kMem || force) {
392  basic_buffer_.open_with_hugepages(prefix);
393  basic_size_ = basic_buffer_.size();
394  basic_pos_ = basic_buffer_.data_size();
395 
396  extra_buffer_.reset();
397  extra_buffer_.set_hugepage_prefered(true);
398  extra_size_ = 0;
399  pos_.store(0);
400  } else if (strategy_ == StorageStrategy::kDisk) {
401  LOG(INFO) << "Open " << prefix << " with normal mmap pages";
402  open_in_memory(prefix);
403  }
404  }
405 
406  void touch(const std::string& filename) override {
408  tmp.open(filename, true);
410  size_t offset = 0;
411  for (size_t k = 0; k < basic_size_; ++k) {
412  std::string_view val = basic_buffer_.get(k);
413  tmp.set(k, offset, val);
414  offset += val.size();
415  }
416  for (size_t k = 0; k < extra_size_; ++k) {
417  std::string_view val = extra_buffer_.get(k);
418  tmp.set(k + basic_size_, offset, val);
419  offset += val.size();
420  }
421 
422  basic_size_ = 0;
423  basic_pos_ = 0;
424  basic_buffer_.reset();
425  extra_size_ = tmp.size();
426  extra_buffer_.swap(tmp);
427  tmp.reset();
428 
429  pos_.store(offset);
430  }
431 
432  void close() override {
433  basic_buffer_.reset();
434  extra_buffer_.reset();
435  }
436 
437  void copy_to_tmp(const std::string& cur_path,
438  const std::string& tmp_path) override {
440  if (!std::filesystem::exists(cur_path + ".data")) {
441  return;
442  }
443  copy_file(cur_path + ".data", tmp_path + ".data");
444  copy_file(cur_path + ".items", tmp_path + ".items");
445 
447  basic_size_ = 0;
448  basic_pos_ = 0;
449  basic_buffer_.reset();
450  tmp.open(tmp_path, true);
451  extra_buffer_.swap(tmp);
452  tmp.reset();
453  pos_.store(extra_buffer_.data_size());
454  }
455 
456  void dump(const std::string& filename) override {
457  if (basic_size_ != 0 && extra_size_ == 0) {
458  basic_buffer_.resize(basic_size_, basic_pos_.load());
459  basic_buffer_.dump(filename);
460  } else if (basic_size_ == 0 && extra_size_ != 0) {
461  extra_buffer_.resize(extra_size_, pos_.load());
462  extra_buffer_.dump(filename);
463  } else {
465  tmp.open(filename, true);
467  (basic_size_ + extra_size_) * width_);
468  size_t offset = 0;
469  for (size_t k = 0; k < basic_size_; ++k) {
470  std::string_view val = basic_buffer_.get(k);
471  tmp.set(k, offset, val);
472  offset += val.size();
473  }
474  for (size_t k = 0; k < extra_size_; ++k) {
475  std::string_view val = extra_buffer_.get(k);
476  tmp.set(k + basic_size_, offset, extra_buffer_.get(k));
477  offset += val.size();
478  }
479  tmp.resize(basic_size_ + extra_size_, offset);
480  tmp.reset();
481  }
482  }
483 
484  size_t size() const override { return basic_size_ + extra_size_; }
485 
486  void resize(size_t size) override {
487  std::unique_lock<std::shared_mutex> lock(rw_mutex_);
488  if (size < basic_buffer_.size()) {
489  basic_size_ = size;
490  extra_size_ = 0;
491  } else {
492  basic_size_ = basic_buffer_.size();
494  if (basic_buffer_.size() != 0) {
495  size_t basic_avg_width =
496  (basic_buffer_.data_size() + basic_buffer_.size() - 1) /
497  basic_buffer_.size();
498  // extra_size_ * basic_avg_width may be smaller than pos_.load()
499  extra_buffer_.resize(
500  extra_size_, std::max(extra_size_ * basic_avg_width, pos_.load()));
501  } else {
502  extra_buffer_.resize(extra_size_,
503  std::max(extra_size_ * width_, pos_.load()));
504  }
505  }
506  // resize `data` of basic_buffer
507  {
508  size_t pos = basic_pos_.load();
509  pos = pos + (pos + 4) / 5;
510  basic_buffer_.resize(basic_size_, pos);
511  }
512  }
513 
514  PropertyType type() const override { return type_; }
515 
516  void set_value(size_t idx, const std::string_view& val) {
517  auto copied_val = val;
518  if (copied_val.size() >= width_) {
519  VLOG(1) << "String length" << copied_val.size()
520  << " exceeds the maximum length: " << width_ << ", cut off.";
521  copied_val = truncate_utf8(copied_val, width_);
522  }
523  if (idx >= basic_size_ && idx < basic_size_ + extra_size_) {
524  size_t offset = pos_.fetch_add(copied_val.size());
525  extra_buffer_.set(idx - basic_size_, offset, copied_val);
526  } else if (idx < basic_size_) {
527  size_t offset = basic_pos_.fetch_add(copied_val.size());
528  basic_buffer_.set(idx, offset, copied_val);
529  } else {
530  LOG(FATAL) << "Index out of range";
531  }
532  }
533 
534  void set_any(size_t idx, const Any& value) override {
535  set_value(idx, value.AsStringView());
536  }
537 
538  // make sure there is enough space for the value
539  void set_value_with_check(size_t idx, const std::string_view& value) {
540  if (idx >= basic_size_ && idx < basic_size_ + extra_size_) {
541  size_t offset = pos_.fetch_add(value.size());
542  if (pos_.load() > extra_buffer_.data_size()) {
543  extra_buffer_.resize(extra_buffer_.size(), pos_.load());
544  }
545  extra_buffer_.set(idx - basic_size_, offset, value);
546  } else if (idx < basic_size_) {
547  size_t offset = basic_pos_.fetch_add(value.size());
548  if (basic_pos_.load() > basic_buffer_.data_size()) {
549  basic_buffer_.resize(basic_buffer_.size(), basic_pos_.load());
550  }
551  basic_buffer_.set(idx, offset, value);
552  } else {
553  LOG(FATAL) << "Index out of range";
554  }
555  }
556 
557  void set_value_safe(size_t idx, const std::string_view& value);
558 
559  inline std::string_view get_view(size_t idx) const {
560  return idx < basic_size_ ? basic_buffer_.get(idx)
561  : extra_buffer_.get(idx - basic_size_);
562  }
563 
564  Any get(size_t idx) const override {
566  }
567 
568  void ingest(uint32_t index, grape::OutArchive& arc) override {
569  std::string_view val;
570  arc >> val;
571  set_value(index, val);
572  }
573 
575  return basic_buffer_;
576  }
577 
578  StorageStrategy storage_strategy() const override { return strategy_; }
579 
580  size_t basic_buffer_size() const { return basic_size_; }
581 
583  return extra_buffer_;
584  }
585 
586  size_t extra_buffer_size() const { return extra_size_; }
587 
588  private:
590  size_t basic_size_;
592  size_t extra_size_;
593  std::atomic<size_t> pos_;
594  std::atomic<size_t> basic_pos_;
596  std::shared_mutex rw_mutex_;
597  uint16_t width_;
599 };
600 
602 template <typename INDEX_T>
603 class LFIndexer;
604 
605 template <typename INDEX_T>
606 class StringMapColumn : public ColumnBase {
607  public:
609  : index_col_(strategy), meta_map_(nullptr) {
611  meta_map_->init(
613  }
614 
616  if (meta_map_) {
617  meta_map_->close();
618  delete meta_map_;
619  }
620  index_col_.close();
621  }
622 
623  void copy_to_tmp(const std::string& cur_path,
624  const std::string& tmp_path) override {
625  meta_map_->copy_to_tmp(cur_path + ".map_meta", tmp_path + ".map_meta");
626  index_col_.copy_to_tmp(cur_path, tmp_path);
627  }
628  void open(const std::string& name, const std::string& snapshot_dir,
629  const std::string& work_dir) override;
630  void open_in_memory(const std::string& name) override;
631  void open_with_hugepages(const std::string& name, bool force) override;
632  void dump(const std::string& filename) override;
633 
634  void touch(const std::string& filename) override {
635  index_col_.touch(filename);
636  }
637 
638  void close() override {
639  if (meta_map_ != nullptr) {
640  meta_map_->close();
641  }
642  index_col_.close();
643  }
644 
645  size_t size() const override { return index_col_.size(); }
646  void resize(size_t size) override { index_col_.resize(size); }
647 
648  PropertyType type() const override { return PropertyType::kStringMap; }
649 
650  void set_value(size_t idx, const std::string_view& val);
651 
652  void set_any(size_t idx, const Any& value) override {
653  set_value(idx, value.AsStringView());
654  }
655 
656  std::string_view get_view(size_t idx) const;
657 
658  Any get(size_t idx) const override {
660  }
661 
662  void ingest(uint32_t index, grape::OutArchive& arc) override {
663  std::string_view val;
664  arc >> val;
665  set_value(index, val);
666  }
667 
669  return index_col_.storage_strategy();
670  }
671 
672  const TypedColumn<INDEX_T>& get_index_col() const { return index_col_; }
673  const LFIndexer<INDEX_T>& get_meta_map() const { return *meta_map_; }
674 
675  private:
678  grape::SpinLock lock_;
679 };
680 
681 template <typename INDEX_T>
682 void StringMapColumn<INDEX_T>::open(const std::string& name,
683  const std::string& snapshot_dir,
684  const std::string& work_dir) {
685  index_col_.open(name, snapshot_dir, work_dir);
686  meta_map_->open(name + ".map_meta", snapshot_dir, work_dir);
687  meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
688 }
689 
690 template <typename INDEX_T>
691 void StringMapColumn<INDEX_T>::open_in_memory(const std::string& name) {
692  index_col_.open_in_memory(name);
693  meta_map_->open_in_memory(name + ".map_meta");
694  meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
695 }
696 
697 template <typename INDEX_T>
699  bool force) {
700  index_col_.open_with_hugepages(name, force);
701  meta_map_->open_with_hugepages(name + ".map_meta", true);
702  meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
703 }
704 
705 template <typename INDEX_T>
706 void StringMapColumn<INDEX_T>::dump(const std::string& filename) {
707  index_col_.dump(filename);
708  meta_map_->dump(filename + ".map_meta", "");
709 }
710 
711 template <typename INDEX_T>
712 std::string_view StringMapColumn<INDEX_T>::get_view(size_t idx) const {
713  INDEX_T ind = index_col_.get_view(idx);
714  return meta_map_->get_key(ind).AsStringView();
715 }
716 
717 template <typename INDEX_T>
719  const std::string_view& val) {
720  INDEX_T lid;
721  if (!meta_map_->get_index(val, lid)) {
722  lock_.lock();
723  if (!meta_map_->get_index(val, lid)) {
724  lid = meta_map_->insert(val);
725  }
726  lock_.unlock();
727  }
728  index_col_.set_value(idx, lid);
729 }
730 
732 
733 std::shared_ptr<ColumnBase> CreateColumn(
735  const std::vector<PropertyType>& sub_types = {});
736 
737 #ifdef USE_PTHASH
738 template <typename EDATA_T>
739 class ConcatColumn : public ColumnBase {
740  public:
741  ~ConcatColumn() {}
742 
743  ConcatColumn(const TypedColumn<EDATA_T>& basic_column,
744  const TypedColumn<EDATA_T>& extra_column)
745  : basic_column_(basic_column),
746  extra_column_(extra_column),
747  basic_size_(basic_column.size()) {}
748 
749  void open(const std::string& name, const std::string& snapshot_dir,
750  const std::string& work_dir) {
751  LOG(FATAL) << "not implemented";
752  }
753 
754  void open_in_memory(const std::string& name) {
755  LOG(FATAL) << "not implemented";
756  }
757 
758  void open_with_hugepages(const std::string& name, bool force) {
759  LOG(FATAL) << "not implemented";
760  }
761 
762  void close() { LOG(FATAL) << "not implemented"; }
763 
764  EDATA_T get_view(size_t index) const {
765  return index < basic_size_ ? basic_column_.get(index)
766  : extra_column_.get(index - basic_size_);
767  }
768 
769  void touch(const std::string& filename) { LOG(FATAL) << "not implemented"; }
770 
771  virtual void dump(const std::string& filename) {
772  LOG(FATAL) << "not implemented";
773  }
774 
775  size_t size() const { return basic_size_ + extra_column_.size(); }
776 
777  void copy_to_tmp(const std::string& cur_path, const std::string& tmp_path) {
778  LOG(FATAL) << "not implemented";
779  }
780  void resize(size_t size) { LOG(FATAL) << "not implemented"; }
781 
782  PropertyType type() const { return AnyConverter<EDATA_T>::type(); }
783 
784  void set_any(size_t index, const Any& value) {
785  LOG(FATAL) << "not implemented";
786  }
787 
788  Any get(size_t index) const {
789  if (index < basic_size_) {
790  return basic_column_.get(index);
791  } else {
792  return extra_column_.get(index - basic_size_);
793  }
794  }
795 
796  void ingest(uint32_t index, grape::OutArchive& arc) {
797  LOG(FATAL) << "not implemented";
798  }
799 
800  StorageStrategy storage_strategy() const {
801  return basic_column_.storage_strategy();
802  }
803 
804  private:
805  const TypedColumn<EDATA_T>& basic_column_;
806  const TypedColumn<EDATA_T>& extra_column_;
807  size_t basic_size_;
808 };
809 #endif
810 
813  public:
814  virtual ~RefColumnBase() {}
815  virtual Any get(size_t index) const = 0;
816 };
817 
818 // Different from TypedColumn, RefColumn is a wrapper of mmap_array
819 template <typename T>
821  public:
822  using value_type = T;
823 
825  : basic_buffer(buffer),
826  basic_size(0),
827  extra_buffer(buffer),
828  extra_size(buffer.size()),
829  strategy_(strategy) {}
831  : basic_buffer(column.basic_buffer()),
832  basic_size(column.basic_buffer_size()),
833  extra_buffer(column.extra_buffer()),
834  extra_size(column.extra_buffer_size()),
835  strategy_(column.storage_strategy()) {}
837 
838  inline T get_view(size_t index) const {
839  return index < basic_size ? basic_buffer.get(index)
840  : extra_buffer.get(index - basic_size);
841  }
842 
843  size_t size() const { return basic_size + extra_size; }
844 
845  Any get(size_t index) const override {
846  return AnyConverter<T>::to_any(get_view(index));
847  }
848 
849  private:
851  size_t basic_size;
853  size_t extra_size;
854 
856 };
857 
858 template <>
860  public:
861  TypedRefColumn(LabelKey label_key) : label_key_(label_key) {}
862 
864 
865  inline LabelKey get_view(size_t index) const { return label_key_; }
866 
867  Any get(size_t index) const override {
868  LOG(ERROR) << "LabelKeyColumn does not support get() to Any";
869  return Any();
870  }
871 
872  private:
874 };
875 
876 template <>
878  public:
880  TypedRefColumn(label_t label_key) : label_key_(label_key) {}
881 
883 
884  inline GlobalId get_view(size_t index) const {
885  return GlobalId(label_key_, index);
886  }
887 
888  Any get(size_t index) const override {
889  LOG(ERROR) << "GlobalId Column does not support get() to Any";
890  return Any();
891  }
892 
893  private:
895 };
896 
897 // Create a reference column from a ColumnBase that contains a const reference
898 // to the actual column storage, offering a column-based store interface for
899 // vertex properties.
900 std::shared_ptr<RefColumnBase> CreateRefColumn(
901  std::shared_ptr<ColumnBase> column);
902 
903 } // namespace gs
904 
905 #endif // GRAPHSCOPE_PROPERTY_COLUMN_H_
gs::TypedColumn< std::string_view >::extra_buffer
const mmap_array< std::string_view > & extra_buffer() const
Definition: column.h:582
gs::TypedColumn< std::string_view >::extra_buffer_size
size_t extra_buffer_size() const
Definition: column.h:586
grape
Definition: types.h:33
gs::TypedColumn::touch
void touch(const std::string &filename) override
Definition: column.h:120
gs::TypedColumn::resize
void resize(size_t size) override
Definition: column.h:176
gs::TypedRefColumn::get_view
T get_view(size_t index) const
Definition: column.h:838
gs::TypedRefColumn::size
size_t size() const
Definition: column.h:843
gs::TypedColumn< std::string_view >::basic_buffer_size
size_t basic_buffer_size() const
Definition: column.h:580
gs::TypedRefColumn< LabelKey >::get
Any get(size_t index) const override
Definition: column.h:867
gs::TypedColumn::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:72
gs::mmap_array< std::string_view >::resize
void resize(size_t size, size_t data_size)
Definition: mmap_array.h:483
gs::PropertyType::GetStringDefaultMaxLength
static uint16_t GetStringDefaultMaxLength()
Definition: types.cc:103
gs::mmap_array< std::string_view >::size
size_t size() const
Definition: mmap_array.h:498
gs::StringMapColumn::get_meta_map
const LFIndexer< INDEX_T > & get_meta_map() const
Definition: column.h:673
gs::StringMapColumn::dump
void dump(const std::string &filename) override
Definition: column.h:706
gs::Any
Definition: types.h:399
gs::TypedColumn< std::string_view >::type
PropertyType type() const override
Definition: column.h:514
gs::StringMapColumn::get_view
std::string_view get_view(size_t idx) const
Definition: column.h:712
gs::ColumnBase::open
virtual void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)=0
gs::TypedColumn< RecordView >::types_
std::vector< PropertyType > types_
Definition: column.h:294
gs::TypedColumn< std::string_view >::resize
void resize(size_t size) override
Definition: column.h:486
gs::TypedColumn::TypedColumn
TypedColumn(StorageStrategy strategy)
Definition: column.h:69
gs::TypedColumn< std::string_view >::dump
void dump(const std::string &filename) override
Definition: column.h:456
gs::TypedColumn::set_value
void set_value(size_t index, const T &val)
Definition: column.h:189
gs::TypedColumn< RecordView >::touch
void touch(const std::string &filename) override
Definition: column.h:255
gs::ColumnBase::type
virtual PropertyType type() const =0
types.h
gs::TypedColumn< RecordView >::type
PropertyType type() const override
Definition: column.h:272
gs::mmap_array< std::string_view >::open
void open(const std::string &filename, bool sync_to_file)
Definition: mmap_array.h:463
gs::StringMapColumn::lock_
grape::SpinLock lock_
Definition: column.h:678
gs::TypedColumn< std::string_view >::basic_buffer
const mmap_array< std::string_view > & basic_buffer() const
Definition: column.h:574
gs::TypedColumn< RecordView >::table_
std::shared_ptr< Table > table_
Definition: column.h:295
gs::TypedColumn< std::string_view >::width_
uint16_t width_
Definition: column.h:597
gs::TypedRefColumn< GlobalId >::~TypedRefColumn
~TypedRefColumn()
Definition: column.h:882
gs::TypedRefColumn< LabelKey >::label_key_
LabelKey label_key_
Definition: column.h:873
gs::TypedColumn< grape::EmptyType >::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:320
gs::TypedColumn::basic_buffer_
mmap_array< T > basic_buffer_
Definition: column.h:226
gs::TypedColumn< grape::EmptyType >::set_any
void set_any(size_t index, const Any &value) override
Definition: column.h:331
gs::RefColumnBase
Create RefColumn for ease of usage for hqps.
Definition: column.h:812
gs::TypedColumn< RecordView >::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:286
gs::TypedColumn::close
void close() override
Definition: column.h:137
gs::StringMapColumn::get_index_col
const TypedColumn< INDEX_T > & get_index_col() const
Definition: column.h:672
gs::TypedColumn::size
size_t size() const override
Definition: column.h:174
gs::StringMapColumn::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:662
gs::TypedRefColumn::extra_buffer
const mmap_array< T > & extra_buffer
Definition: column.h:852
gs::TypedColumn< std::string_view >::size
size_t size() const override
Definition: column.h:484
gs::StringMapColumn::set_any
void set_any(size_t idx, const Any &value) override
Definition: column.h:652
gs::StringMapColumn::StringMapColumn
StringMapColumn(StorageStrategy strategy)
Definition: column.h:608
gs::TypedColumn< RecordView >::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:244
gs::TypedColumn< grape::EmptyType >::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:341
gs::StringMapColumn::close
void close() override
Definition: column.h:638
gs::PropertyType::Varchar
static PropertyType Varchar(uint16_t max_length)
Definition: types.cc:357
gs::TypedColumn< std::string_view >::~TypedColumn
~TypedColumn()
Definition: column.h:357
gs::TypedColumn::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:101
gs::TypedColumn< std::string_view >::TypedColumn
TypedColumn(StorageStrategy strategy)
Definition: column.h:353
gs::mmap_array::resize
void resize(size_t size)
Definition: mmap_array.h:319
gs::StringMapColumn::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:698
gs::GlobalId
Definition: types.h:167
gs::StorageStrategy::kDisk
@ kDisk
gs::TypedColumn::type
PropertyType type() const override
Definition: column.h:187
gs::StringMapColumn::size
size_t size() const override
Definition: column.h:645
gs::LabelKey::label_data_type
uint8_t label_data_type
Definition: types.h:284
gs::TypedColumn< std::string_view >::open_in_memory
void open_in_memory(const std::string &prefix) override
Definition: column.h:380
gs::TypedColumn< std::string_view >::TypedColumn
TypedColumn(StorageStrategy strategy, uint16_t width)
Definition: column.h:349
gs
Definition: adj_list.h:23
gs::TypedColumn
Definition: column.h:67
gs::StringMapColumn::~StringMapColumn
~StringMapColumn()
Definition: column.h:615
gs::TypedColumn< RecordView >::dump
void dump(const std::string &filename) override
Definition: column.h:259
gs::TypedColumn< grape::EmptyType >::get
Any get(size_t index) const override
Definition: column.h:335
gs::PropertyType::kEmpty
static const PropertyType kEmpty
Definition: types.h:137
gs::StorageStrategy
StorageStrategy
Definition: types.h:58
gs::StringMapColumn::index_col_
TypedColumn< INDEX_T > index_col_
Definition: column.h:676
gs::TypedColumn< std::string_view >::close
void close() override
Definition: column.h:432
gs::TypedRefColumn::get
Any get(size_t index) const override
Definition: column.h:845
gs::CreateRefColumn
std::shared_ptr< RefColumnBase > CreateRefColumn(std::shared_ptr< ColumnBase > column)
Definition: column.cc:260
gs::TypedColumn::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:212
gs::TypedColumn::get
Any get(size_t index) const override
Definition: column.h:208
gs::mmap_array< std::string_view >::reset
void reset()
Definition: mmap_array.h:453
gs::TypedRefColumn< LabelKey >::get_view
LabelKey get_view(size_t index) const
Definition: column.h:865
gs::TypedColumn::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:218
gs::TypedColumn< std::string_view >::extra_buffer_
mmap_array< std::string_view > extra_buffer_
Definition: column.h:591
gs::mmap_array::open
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
gs::StringMapColumn::resize
void resize(size_t size) override
Definition: column.h:646
gs::TypedRefColumn::basic_size
size_t basic_size
Definition: column.h:851
gs::TypedColumn::extra_buffer
const mmap_array< T > & extra_buffer() const
Definition: column.h:222
gs::StringMapColumn::open_in_memory
void open_in_memory(const std::string &name) override
Definition: column.h:691
gs::TypedColumn< grape::EmptyType >::strategy_
StorageStrategy strategy_
Definition: column.h:344
gs::TypedRefColumn::extra_size
size_t extra_size
Definition: column.h:853
gs::ColumnBase::storage_strategy
virtual StorageStrategy storage_strategy() const =0
gs::TypedColumn< grape::EmptyType >::TypedColumn
TypedColumn(StorageStrategy strategy)
Definition: column.h:314
gs::TypedRefColumn< GlobalId >::get
Any get(size_t index) const override
Definition: column.h:888
gs::TypedColumn< std::string_view >::strategy_
StorageStrategy strategy_
Definition: column.h:595
gs::TypedColumn::get_view
T get_view(size_t index) const
Definition: column.h:203
gs::TypedRefColumn::TypedRefColumn
TypedRefColumn(const TypedColumn< T > &column)
Definition: column.h:830
gs::CreateColumn
std::shared_ptr< ColumnBase > CreateColumn(PropertyType type, StorageStrategy strategy, const std::vector< PropertyType > &sub_types)
Definition: column.cc:141
gs::TypedColumn< std::string_view >::type_
PropertyType type_
Definition: column.h:598
gs::TypedColumn< std::string_view >::pos_
std::atomic< size_t > pos_
Definition: column.h:593
gs::TypedColumn< grape::EmptyType >::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:339
gs::TypedColumn< std::string_view >::set_value
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:516
gs::TypedColumn::basic_buffer_size
size_t basic_buffer_size() const
Definition: column.h:221
gs::TypedRefColumn::TypedRefColumn
TypedRefColumn(const mmap_array< T > &buffer, StorageStrategy strategy)
Definition: column.h:824
gs::TypedColumn< std::string_view >::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:578
gs::TypedColumn< std::string_view >
Definition: column.h:347
gs::TypedColumn< std::string_view >::rw_mutex_
std::shared_mutex rw_mutex_
Definition: column.h:596
gs::TypedColumn< std::string_view >::basic_pos_
std::atomic< size_t > basic_pos_
Definition: column.h:594
gs::TypedColumn< RecordView >::~TypedColumn
~TypedColumn()
Definition: column.h:242
gs::TypedColumn< std::string_view >::basic_size_
size_t basic_size_
Definition: column.h:590
gs::TypedRefColumn< LabelKey >::~TypedRefColumn
~TypedRefColumn()
Definition: column.h:863
gs::TypedColumn::open_in_memory
void open_in_memory(const std::string &name) override
Definition: column.h:89
gs::TypedColumn< std::string_view >::open_with_hugepages
void open_with_hugepages(const std::string &prefix, bool force) override
Definition: column.h:390
gs::mmap_array::size
size_t size() const
Definition: mmap_array.h:415
gs::TypedColumn< RecordView >::TypedColumn
TypedColumn(const std::vector< PropertyType > &types)
Definition: column.h:236
gs::ColumnBase::copy_to_tmp
virtual void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)=0
gs::StringMapColumn::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:682
gs::PropertyType::kRecordView
static const PropertyType kRecordView
Definition: types.h:153
gs::ColumnBase::set_any
virtual void set_any(size_t index, const Any &value)=0
gs::StringMapColumn::touch
void touch(const std::string &filename) override
Definition: column.h:634
gs::mmap_array< std::string_view >
Definition: mmap_array.h:447
gs::TypedColumn::strategy_
StorageStrategy strategy_
Definition: column.h:230
gs::TypedColumn< std::string_view >::extra_size_
size_t extra_size_
Definition: column.h:592
gs::RefColumnBase::~RefColumnBase
virtual ~RefColumnBase()
Definition: column.h:814
gs::LabelKey
Definition: types.h:283
gs::TypedColumn< RecordView >::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:282
gs::TypedColumn< std::string_view >::touch
void touch(const std::string &filename) override
Definition: column.h:406
gs::TypedColumn::~TypedColumn
~TypedColumn()
Definition: column.h:70
gs::TypedColumn< grape::EmptyType >::size
size_t size() const override
Definition: column.h:326
gs::TypedRefColumn< GlobalId >::TypedRefColumn
TypedRefColumn(label_t label_key)
Definition: column.h:880
gs::ColumnBase::dump
virtual void dump(const std::string &filename)=0
gs::TypedColumn::dump
void dump(const std::string &filename) override
Definition: column.h:157
gs::TypedColumn::extra_buffer_
mmap_array< T > extra_buffer_
Definition: column.h:228
gs::TypedColumn< grape::EmptyType >::~TypedColumn
~TypedColumn()
Definition: column.h:315
gs::TypedRefColumn::value_type
T value_type
Definition: column.h:822
gs::TypedColumn< grape::EmptyType >::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:317
mmap_array.h
gs::ColumnBase::get
virtual Any get(size_t index) const =0
gs::StringMapColumn
Definition: column.h:606
gs::ColumnBase::~ColumnBase
virtual ~ColumnBase()
Definition: column.h:34
gs::ColumnBase::size
virtual size_t size() const =0
gs::StringMapColumn::type
PropertyType type() const override
Definition: column.h:648
gs::ColumnBase::open_in_memory
virtual void open_in_memory(const std::string &name)=0
gs::TypedColumn< std::string_view >::ingest
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:568
gs::copy_file
void copy_file(const std::string &src, const std::string &dst)
Definition: file_names.h:80
gs::TypedRefColumn< GlobalId >::get_view
GlobalId get_view(size_t index) const
Definition: column.h:884
gs::TypedColumn::extra_size_
size_t extra_size_
Definition: column.h:229
gs::TypedColumn::set_any
void set_any(size_t index, const Any &value) override
Definition: column.h:199
gs::TypedRefColumn
Definition: column.h:820
gs::truncate_utf8
std::string_view truncate_utf8(std::string_view str, size_t length)
Definition: column.cc:25
gs::ColumnBase::close
virtual void close()=0
gs::TypedColumn< grape::EmptyType >::touch
void touch(const std::string &filename) override
Definition: column.h:321
gs::mmap_array
Definition: mmap_array.h:65
gs::StringMapColumn::get
Any get(size_t idx) const override
Definition: column.h:658
gs::StringMapColumn::storage_strategy
StorageStrategy storage_strategy() const override
Definition: column.h:668
gs::snapshot_dir
std::string snapshot_dir(const std::string &work_dir, uint32_t version)
Definition: file_names.h:192
std
Definition: loading_config.h:232
gs::TypedRefColumn::strategy_
StorageStrategy strategy_
Definition: column.h:855
gs::ColumnBase::open_with_hugepages
virtual void open_with_hugepages(const std::string &name, bool force)=0
gs::TypedColumn< grape::EmptyType >::set_value
void set_value(size_t index, const grape::EmptyType &value)
Definition: column.h:333
gs::TypedColumn< RecordView >::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:263
gs::TypedColumn< std::string_view >::open
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:359
gs::TypedColumn< grape::EmptyType >::close
void close() override
Definition: column.h:325
gs::TypedColumn< std::string_view >::get_view
std::string_view get_view(size_t idx) const
Definition: column.h:559
gs::StringMapColumn::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:623
gs::RefColumnBase::get
virtual Any get(size_t index) const =0
gs::TypedColumn< RecordView >::sub_types
std::vector< PropertyType > sub_types() const
Definition: column.h:291
gs::AnyConverter
Definition: types.h:397
gs::TypedRefColumn::basic_buffer
const mmap_array< T > & basic_buffer
Definition: column.h:850
gs::ColumnBase::ingest
virtual void ingest(uint32_t index, grape::OutArchive &arc)=0
gs::TypedColumn< grape::EmptyType >::dump
void dump(const std::string &filename) override
Definition: column.h:322
gs::TypedColumn< std::string_view >::get
Any get(size_t idx) const override
Definition: column.h:564
gs::mmap_array< std::string_view >::set
void set(size_t idx, size_t offset, const std::string_view &val)
Definition: mmap_array.h:488
gs::TypedColumn::extra_buffer_size
size_t extra_buffer_size() const
Definition: column.h:223
gs::TypedRefColumn::~TypedRefColumn
~TypedRefColumn()
Definition: column.h:836
gs::TypedRefColumn< GlobalId >::label_key_
label_t label_key_
Definition: column.h:894
gs::TypedColumn< grape::EmptyType >::resize
void resize(size_t size) override
Definition: column.h:327
gs::TypedColumn< grape::EmptyType >::open_in_memory
void open_in_memory(const std::string &name) override
Definition: column.h:319
gs::TypedColumn< grape::EmptyType >::get_view
grape::EmptyType get_view(size_t index) const
Definition: column.h:337
gs::TypedColumn< std::string_view >::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:437
gs::StringMapColumn::meta_map_
LFIndexer< INDEX_T > * meta_map_
Definition: column.h:677
gs::PropertyType
Definition: types.h:95
gs::RecordView
Definition: types.h:292
gs::TypedColumn< RecordView >
Definition: column.h:234
gs::TypedRefColumn< LabelKey >::TypedRefColumn
TypedRefColumn(LabelKey label_key)
Definition: column.h:861
gs::StringMapColumn::set_value
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:718
gs::TypedColumn< grape::EmptyType >::type
PropertyType type() const override
Definition: column.h:329
gs::ColumnBase::resize
virtual void resize(size_t size)=0
gs::mmap_array::reset
void reset()
Definition: mmap_array.h:84
gs::mmap_array::set
void set(size_t idx, const T &val)
Definition: mmap_array.h:408
gs::ColumnBase::touch
virtual void touch(const std::string &filename)=0
gs::TypedColumn::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:142
gs::TypedColumn::basic_buffer
const mmap_array< T > & basic_buffer() const
Definition: column.h:220
gs::LFIndexer
Definition: id_indexer.h:184
gs::PropertyType::kStringMap
static const PropertyType kStringMap
Definition: types.h:150
gs::TypedColumn< std::string_view >::set_any
void set_any(size_t idx, const Any &value) override
Definition: column.h:534
gs::TypedRefColumn< GlobalId >::label_t
typename LabelKey::label_data_type label_t
Definition: column.h:879
gs::ColumnBase
Definition: column.h:32
gs::StorageStrategy::kMem
@ kMem
gs::Any::AsStringView
std::string_view AsStringView() const
Definition: types.h:657
gs::TypedColumn::basic_size_
size_t basic_size_
Definition: column.h:227
gs::TypedColumn< std::string_view >::basic_buffer_
mmap_array< std::string_view > basic_buffer_
Definition: column.h:589
gs::TypedColumn< std::string_view >::set_value_with_check
void set_value_with_check(size_t idx, const std::string_view &value)
Definition: column.h:539
gs::TypedColumn< RecordView >::open_with_hugepages
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:251
gs::TypedColumn< grape::EmptyType >::copy_to_tmp
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:323