Go to the documentation of this file.
16 #ifndef GRAPHSCOPE_PROPERTY_COLUMN_H_
17 #define GRAPHSCOPE_PROPERTY_COLUMN_H_
20 #include <string_view>
21 #include "grape/utils/concurrent_queue.h"
25 #include "grape/serialization/out_archive.h"
29 std::string_view
truncate_utf8(std::string_view str,
size_t length);
36 const std::string& work_dir) = 0;
42 virtual void close() = 0;
44 virtual void touch(
const std::string& filename) = 0;
46 virtual void dump(
const std::string& filename) = 0;
48 virtual size_t size()
const = 0;
50 virtual void copy_to_tmp(
const std::string& cur_path,
51 const std::string& tmp_path) = 0;
56 virtual void set_any(
size_t index,
const Any& value) = 0;
58 virtual Any get(
size_t index)
const = 0;
60 virtual void ingest(uint32_t index, grape::OutArchive& arc) = 0;
72 const std::string& work_dir)
override {
74 if (std::filesystem::exists(basic_path)) {
89 if (!name.empty() && std::filesystem::exists(name)) {
102 if (!name.empty() && std::filesystem::exists(name)) {
114 LOG(INFO) <<
"Open " << name <<
" with normal mmap pages";
119 void touch(
const std::string& filename)
override {
121 tmp.
open(filename,
true);
142 const std::string& tmp_path)
override {
144 if (!std::filesystem::exists(cur_path)) {
150 tmp.
open(tmp_path,
true);
156 void dump(
const std::string& filename)
override {
163 tmp.
open(filename,
true);
194 throw std::runtime_error(
"Index out of range");
211 void ingest(uint32_t index, grape::OutArchive& arc)
override {
235 TypedColumn(
const std::vector<PropertyType>& types) : types_(types) {
236 if (types.size() == 0) {
237 LOG(FATAL) <<
"RecordView column must have sub types.";
244 const std::string& work_dir)
override {
245 LOG(FATAL) <<
"RecordView column does not support open.";
251 LOG(FATAL) <<
"RecordView column does not support open with hugepages.";
254 void touch(
const std::string& filename)
override {
255 LOG(FATAL) <<
"RecordView column does not support touch.";
258 void dump(
const std::string& filename)
override {
259 LOG(FATAL) <<
"RecordView column does not support dump.";
263 const std::string& tmp_path)
override {
264 LOG(FATAL) <<
"RecordView column does not support copy_to_tmp.";
266 void close()
override;
268 size_t size()
const override;
273 void set_any(
size_t index,
const Any& value)
override;
279 Any get(
size_t index)
const override;
281 void ingest(uint32_t index, grape::OutArchive& arc)
override {
282 LOG(FATAL) <<
"RecordView column does not support ingest.";
286 LOG(ERROR) <<
"RecordView column does not have storage strategy.";
290 std::vector<PropertyType>
sub_types()
const {
return types_; }
317 const std::string& work_dir)
override {}
320 void touch(
const std::string& filename)
override {}
321 void dump(
const std::string& filename)
override {}
323 const std::string& tmp_path)
override {}
325 size_t size()
const override {
return 0; }
332 void set_value(
size_t index,
const grape::EmptyType& value) {}
336 grape::EmptyType
get_view(
size_t index)
const {
return grape::EmptyType(); }
338 void ingest(uint32_t index, grape::OutArchive& arc)
override {}
354 const std::string& work_dir)
override {
356 if (std::filesystem::exists(basic_path +
".items")) {
364 if (work_dir ==
"") {
395 LOG(INFO) <<
"Open " << prefix <<
" with normal mmap pages";
400 void touch(
const std::string& filename)
override {
402 tmp.
open(filename,
true);
407 tmp.
set(k, offset, val);
408 offset += val.size();
413 offset += val.size();
432 const std::string& tmp_path)
override {
434 if (!std::filesystem::exists(cur_path +
".data")) {
437 copy_file(cur_path +
".data", tmp_path +
".data");
438 copy_file(cur_path +
".items", tmp_path +
".items");
444 tmp.
open(tmp_path,
true);
450 void dump(
const std::string& filename)
override {
459 tmp.
open(filename,
true);
465 tmp.
set(k, offset, val);
466 offset += val.size();
471 offset += val.size();
488 size_t basic_avg_width =
501 size_t pos = basic_pos_.load();
502 pos = pos + (pos + 4) / 5;
509 void set_value(
size_t idx,
const std::string_view& val) {
510 auto copied_val = val;
511 if (copied_val.size() >= width_) {
512 VLOG(1) <<
"String length" << copied_val.size()
513 <<
" exceeds the maximum length: " << width_ <<
", cut off.";
517 size_t offset = pos_.fetch_add(copied_val.size());
520 size_t offset = basic_pos_.fetch_add(copied_val.size());
523 LOG(FATAL) <<
"Index out of range";
534 size_t offset = pos_.fetch_add(value.size());
540 size_t offset = basic_pos_.fetch_add(value.size());
546 LOG(FATAL) <<
"Index out of range";
550 inline std::string_view
get_view(
size_t idx)
const {
559 void ingest(uint32_t index, grape::OutArchive& arc)
override {
560 std::string_view val;
591 template <
typename INDEX_T>
594 template <
typename INDEX_T>
613 const std::string& tmp_path)
override {
614 meta_map_->copy_to_tmp(cur_path +
".map_meta", tmp_path +
".map_meta");
618 const std::string& work_dir)
override;
621 void dump(
const std::string& filename)
override;
623 void touch(
const std::string& filename)
override {
639 void set_value(
size_t idx,
const std::string_view& val);
645 std::string_view
get_view(
size_t idx)
const;
651 void ingest(uint32_t index, grape::OutArchive& arc)
override {
652 std::string_view val;
670 template <
typename INDEX_T>
673 const std::string& work_dir) {
675 meta_map_->open(name +
".map_meta",
snapshot_dir, work_dir);
676 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
679 template <
typename INDEX_T>
681 index_col_.open_in_memory(name);
682 meta_map_->open_in_memory(name +
".map_meta");
683 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
686 template <
typename INDEX_T>
689 index_col_.open_with_hugepages(name, force);
690 meta_map_->open_with_hugepages(name +
".map_meta",
true);
691 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
694 template <
typename INDEX_T>
696 index_col_.dump(filename);
697 meta_map_->dump(filename +
".map_meta",
"");
700 template <
typename INDEX_T>
702 INDEX_T ind = index_col_.get_view(idx);
703 return meta_map_->get_key(ind).AsStringView();
706 template <
typename INDEX_T>
708 const std::string_view& val) {
710 if (!meta_map_->get_index(val, lid)) {
712 if (!meta_map_->get_index(val, lid)) {
713 lid = meta_map_->insert(val);
717 index_col_.set_value(idx, lid);
724 const std::vector<PropertyType>& sub_types = {});
727 template <
typename EDATA_T>
728 class ConcatColumn :
public ColumnBase {
732 ConcatColumn(
const TypedColumn<EDATA_T>& basic_column,
733 const TypedColumn<EDATA_T>& extra_column)
734 : basic_column_(basic_column),
735 extra_column_(extra_column),
736 basic_size_(basic_column.size()) {}
738 void open(
const std::string& name,
const std::string&
snapshot_dir,
739 const std::string& work_dir) {
740 LOG(FATAL) <<
"not implemented";
743 void open_in_memory(
const std::string& name) {
744 LOG(FATAL) <<
"not implemented";
747 void open_with_hugepages(
const std::string& name,
bool force) {
748 LOG(FATAL) <<
"not implemented";
751 void close() { LOG(FATAL) <<
"not implemented"; }
753 EDATA_T get_view(
size_t index)
const {
754 return index < basic_size_ ? basic_column_.get(index)
755 : extra_column_.get(index - basic_size_);
758 void touch(
const std::string& filename) { LOG(FATAL) <<
"not implemented"; }
760 virtual void dump(
const std::string& filename) {
761 LOG(FATAL) <<
"not implemented";
764 size_t size()
const {
return basic_size_ + extra_column_.size(); }
766 void copy_to_tmp(
const std::string& cur_path,
const std::string& tmp_path) {
767 LOG(FATAL) <<
"not implemented";
769 void resize(
size_t size) { LOG(FATAL) <<
"not implemented"; }
771 PropertyType type()
const {
return AnyConverter<EDATA_T>::type(); }
773 void set_any(
size_t index,
const Any& value) {
774 LOG(FATAL) <<
"not implemented";
777 Any get(
size_t index)
const {
778 if (index < basic_size_) {
779 return basic_column_.get(index);
781 return extra_column_.get(index - basic_size_);
785 void ingest(uint32_t index, grape::OutArchive& arc) {
786 LOG(FATAL) <<
"not implemented";
790 return basic_column_.storage_strategy();
794 const TypedColumn<EDATA_T>& basic_column_;
795 const TypedColumn<EDATA_T>& extra_column_;
804 virtual Any get(
size_t index)
const = 0;
808 template <
typename T>
857 LOG(ERROR) <<
"LabelKeyColumn does not support get() to Any";
878 LOG(ERROR) <<
"GlobalId Column does not support get() to Any";
890 std::shared_ptr<ColumnBase> column);
894 #endif // GRAPHSCOPE_PROPERTY_COLUMN_H_
const mmap_array< std::string_view > & extra_buffer() const
Definition: column.h:573
size_t extra_buffer_size() const
Definition: column.h:577
void touch(const std::string &filename) override
Definition: column.h:119
void resize(size_t size) override
Definition: column.h:175
T get_view(size_t index) const
Definition: column.h:827
size_t size() const
Definition: column.h:832
size_t basic_buffer_size() const
Definition: column.h:571
Any get(size_t index) const override
Definition: column.h:856
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:71
void resize(size_t size, size_t data_size)
Definition: mmap_array.h:483
size_t size() const
Definition: mmap_array.h:498
const LFIndexer< INDEX_T > & get_meta_map() const
Definition: column.h:662
void dump(const std::string &filename) override
Definition: column.h:695
PropertyType type() const override
Definition: column.h:507
std::string_view get_view(size_t idx) const
Definition: column.h:701
virtual void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)=0
std::vector< PropertyType > types_
Definition: column.h:293
void resize(size_t size) override
Definition: column.h:480
TypedColumn(StorageStrategy strategy)
Definition: column.h:68
void dump(const std::string &filename) override
Definition: column.h:450
void set_value(size_t index, const T &val)
Definition: column.h:188
void touch(const std::string &filename) override
Definition: column.h:254
virtual PropertyType type() const =0
PropertyType type() const override
Definition: column.h:271
void open(const std::string &filename, bool sync_to_file)
Definition: mmap_array.h:463
grape::SpinLock lock_
Definition: column.h:667
const mmap_array< std::string_view > & basic_buffer() const
Definition: column.h:565
std::shared_ptr< Table > table_
Definition: column.h:294
uint16_t width_
Definition: column.h:587
~TypedRefColumn()
Definition: column.h:871
LabelKey label_key_
Definition: column.h:862
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:319
mmap_array< T > basic_buffer_
Definition: column.h:225
void set_any(size_t index, const Any &value) override
Definition: column.h:330
Create RefColumn for ease of usage for hqps.
Definition: column.h:801
StorageStrategy storage_strategy() const override
Definition: column.h:285
void close() override
Definition: column.h:136
const TypedColumn< INDEX_T > & get_index_col() const
Definition: column.h:661
size_t size() const override
Definition: column.h:173
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:651
const mmap_array< T > & extra_buffer
Definition: column.h:841
size_t size() const override
Definition: column.h:478
void set_any(size_t idx, const Any &value) override
Definition: column.h:641
StringMapColumn(StorageStrategy strategy)
Definition: column.h:597
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:243
StorageStrategy storage_strategy() const override
Definition: column.h:340
void close() override
Definition: column.h:627
static PropertyType Varchar(uint16_t max_length)
Definition: types.cc:335
~TypedColumn()
Definition: column.h:351
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:100
void resize(size_t size)
Definition: mmap_array.h:319
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:687
PropertyType type() const override
Definition: column.h:186
size_t size() const override
Definition: column.h:634
uint8_t label_data_type
Definition: types.h:280
void open_in_memory(const std::string &prefix) override
Definition: column.h:374
Definition: adj_list.h:23
~StringMapColumn()
Definition: column.h:604
void dump(const std::string &filename) override
Definition: column.h:258
Any get(size_t index) const override
Definition: column.h:334
static const PropertyType kEmpty
Definition: types.h:133
StorageStrategy
Definition: types.h:58
TypedColumn< INDEX_T > index_col_
Definition: column.h:665
void close() override
Definition: column.h:426
Any get(size_t index) const override
Definition: column.h:834
std::shared_ptr< RefColumnBase > CreateRefColumn(std::shared_ptr< ColumnBase > column)
Definition: column.cc:221
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:211
Any get(size_t index) const override
Definition: column.h:207
void reset()
Definition: mmap_array.h:453
LabelKey get_view(size_t index) const
Definition: column.h:854
StorageStrategy storage_strategy() const override
Definition: column.h:217
mmap_array< std::string_view > extra_buffer_
Definition: column.h:582
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
void resize(size_t size) override
Definition: column.h:635
size_t basic_size
Definition: column.h:840
const mmap_array< T > & extra_buffer() const
Definition: column.h:221
void open_in_memory(const std::string &name) override
Definition: column.h:680
StorageStrategy strategy_
Definition: column.h:343
size_t extra_size
Definition: column.h:842
virtual StorageStrategy storage_strategy() const =0
TypedColumn(StorageStrategy strategy)
Definition: column.h:313
static constexpr const uint16_t STRING_DEFAULT_MAX_LENGTH
Definition: types.h:96
Any get(size_t index) const override
Definition: column.h:877
StorageStrategy strategy_
Definition: column.h:586
T get_view(size_t index) const
Definition: column.h:202
TypedRefColumn(const TypedColumn< T > &column)
Definition: column.h:819
std::shared_ptr< ColumnBase > CreateColumn(PropertyType type, StorageStrategy strategy, const std::vector< PropertyType > &sub_types)
Definition: column.cc:141
std::atomic< size_t > pos_
Definition: column.h:584
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:338
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:509
size_t basic_buffer_size() const
Definition: column.h:220
TypedRefColumn(const mmap_array< T > &buffer, StorageStrategy strategy)
Definition: column.h:813
StorageStrategy storage_strategy() const override
Definition: column.h:569
TypedColumn(StorageStrategy strategy, uint16_t width=PropertyType::STRING_DEFAULT_MAX_LENGTH)
Definition: column.h:348
std::atomic< size_t > basic_pos_
Definition: column.h:585
~TypedColumn()
Definition: column.h:241
size_t basic_size_
Definition: column.h:581
~TypedRefColumn()
Definition: column.h:852
void open_in_memory(const std::string &name) override
Definition: column.h:88
void open_with_hugepages(const std::string &prefix, bool force) override
Definition: column.h:384
size_t size() const
Definition: mmap_array.h:415
TypedColumn(const std::vector< PropertyType > &types)
Definition: column.h:235
virtual void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)=0
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:671
static const PropertyType kRecordView
Definition: types.h:149
virtual void set_any(size_t index, const Any &value)=0
void touch(const std::string &filename) override
Definition: column.h:623
Definition: mmap_array.h:447
StorageStrategy strategy_
Definition: column.h:229
size_t extra_size_
Definition: column.h:583
virtual ~RefColumnBase()
Definition: column.h:803
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:281
void touch(const std::string &filename) override
Definition: column.h:400
~TypedColumn()
Definition: column.h:69
size_t size() const override
Definition: column.h:325
TypedRefColumn(label_t label_key)
Definition: column.h:869
virtual void dump(const std::string &filename)=0
void dump(const std::string &filename) override
Definition: column.h:156
mmap_array< T > extra_buffer_
Definition: column.h:227
~TypedColumn()
Definition: column.h:314
T value_type
Definition: column.h:811
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:316
virtual Any get(size_t index) const =0
virtual ~ColumnBase()
Definition: column.h:33
virtual size_t size() const =0
PropertyType type() const override
Definition: column.h:637
virtual void open_in_memory(const std::string &name)=0
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:559
void copy_file(const std::string &src, const std::string &dst)
Definition: file_names.h:80
GlobalId get_view(size_t index) const
Definition: column.h:873
size_t extra_size_
Definition: column.h:228
void set_any(size_t index, const Any &value) override
Definition: column.h:198
std::string_view truncate_utf8(std::string_view str, size_t length)
Definition: column.cc:25
void touch(const std::string &filename) override
Definition: column.h:320
Definition: mmap_array.h:65
Any get(size_t idx) const override
Definition: column.h:647
StorageStrategy storage_strategy() const override
Definition: column.h:657
std::string snapshot_dir(const std::string &work_dir, uint32_t version)
Definition: file_names.h:192
Definition: loading_config.h:232
StorageStrategy strategy_
Definition: column.h:844
virtual void open_with_hugepages(const std::string &name, bool force)=0
void set_value(size_t index, const grape::EmptyType &value)
Definition: column.h:332
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:262
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:353
void close() override
Definition: column.h:324
std::string_view get_view(size_t idx) const
Definition: column.h:550
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:612
virtual Any get(size_t index) const =0
std::vector< PropertyType > sub_types() const
Definition: column.h:290
const mmap_array< T > & basic_buffer
Definition: column.h:839
virtual void ingest(uint32_t index, grape::OutArchive &arc)=0
void dump(const std::string &filename) override
Definition: column.h:321
Any get(size_t idx) const override
Definition: column.h:555
void set(size_t idx, size_t offset, const std::string_view &val)
Definition: mmap_array.h:488
size_t extra_buffer_size() const
Definition: column.h:222
~TypedRefColumn()
Definition: column.h:825
label_t label_key_
Definition: column.h:883
void resize(size_t size) override
Definition: column.h:326
void open_in_memory(const std::string &name) override
Definition: column.h:318
grape::EmptyType get_view(size_t index) const
Definition: column.h:336
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:431
LFIndexer< INDEX_T > * meta_map_
Definition: column.h:666
TypedRefColumn(LabelKey label_key)
Definition: column.h:850
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:707
PropertyType type() const override
Definition: column.h:328
virtual void resize(size_t size)=0
void reset()
Definition: mmap_array.h:84
void set(size_t idx, const T &val)
Definition: mmap_array.h:408
virtual void touch(const std::string &filename)=0
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:141
const mmap_array< T > & basic_buffer() const
Definition: column.h:219
Definition: id_indexer.h:184
static const PropertyType kStringMap
Definition: types.h:146
void set_any(size_t idx, const Any &value) override
Definition: column.h:527
typename LabelKey::label_data_type label_t
Definition: column.h:868
std::string_view AsStringView() const
Definition: types.h:653
size_t basic_size_
Definition: column.h:226
mmap_array< std::string_view > basic_buffer_
Definition: column.h:580
void set_value_with_check(size_t idx, const std::string_view &value)
Definition: column.h:532
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:250
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:322