Go to the documentation of this file.
16 #ifndef GRAPHSCOPE_PROPERTY_COLUMN_H_
17 #define GRAPHSCOPE_PROPERTY_COLUMN_H_
19 #include <shared_mutex>
21 #include <string_view>
22 #include "grape/utils/concurrent_queue.h"
26 #include "grape/serialization/out_archive.h"
30 std::string_view
truncate_utf8(std::string_view str,
size_t length);
37 const std::string& work_dir) = 0;
43 virtual void close() = 0;
45 virtual void touch(
const std::string& filename) = 0;
47 virtual void dump(
const std::string& filename) = 0;
49 virtual size_t size()
const = 0;
51 virtual void copy_to_tmp(
const std::string& cur_path,
52 const std::string& tmp_path) = 0;
57 virtual void set_any(
size_t index,
const Any& value) = 0;
59 virtual Any get(
size_t index)
const = 0;
61 virtual void ingest(uint32_t index, grape::OutArchive& arc) = 0;
73 const std::string& work_dir)
override {
75 if (std::filesystem::exists(basic_path)) {
90 if (!name.empty() && std::filesystem::exists(name)) {
103 if (!name.empty() && std::filesystem::exists(name)) {
115 LOG(INFO) <<
"Open " << name <<
" with normal mmap pages";
120 void touch(
const std::string& filename)
override {
122 tmp.
open(filename,
true);
143 const std::string& tmp_path)
override {
145 if (!std::filesystem::exists(cur_path)) {
151 tmp.
open(tmp_path,
true);
157 void dump(
const std::string& filename)
override {
164 tmp.
open(filename,
true);
195 throw std::runtime_error(
"Index out of range");
212 void ingest(uint32_t index, grape::OutArchive& arc)
override {
236 TypedColumn(
const std::vector<PropertyType>& types) : types_(types) {
237 if (types.size() == 0) {
238 LOG(FATAL) <<
"RecordView column must have sub types.";
245 const std::string& work_dir)
override {
246 LOG(FATAL) <<
"RecordView column does not support open.";
252 LOG(FATAL) <<
"RecordView column does not support open with hugepages.";
255 void touch(
const std::string& filename)
override {
256 LOG(FATAL) <<
"RecordView column does not support touch.";
259 void dump(
const std::string& filename)
override {
260 LOG(FATAL) <<
"RecordView column does not support dump.";
264 const std::string& tmp_path)
override {
265 LOG(FATAL) <<
"RecordView column does not support copy_to_tmp.";
267 void close()
override;
269 size_t size()
const override;
274 void set_any(
size_t index,
const Any& value)
override;
280 Any get(
size_t index)
const override;
282 void ingest(uint32_t index, grape::OutArchive& arc)
override {
283 LOG(FATAL) <<
"RecordView column does not support ingest.";
287 LOG(ERROR) <<
"RecordView column does not have storage strategy.";
291 std::vector<PropertyType>
sub_types()
const {
return types_; }
318 const std::string& work_dir)
override {}
321 void touch(
const std::string& filename)
override {}
322 void dump(
const std::string& filename)
override {}
324 const std::string& tmp_path)
override {}
326 size_t size()
const override {
return 0; }
333 void set_value(
size_t index,
const grape::EmptyType& value) {}
337 grape::EmptyType
get_view(
size_t index)
const {
return grape::EmptyType(); }
339 void ingest(uint32_t index, grape::OutArchive& arc)
override {}
360 const std::string& work_dir)
override {
362 if (std::filesystem::exists(basic_path +
".items")) {
370 if (work_dir ==
"") {
401 LOG(INFO) <<
"Open " << prefix <<
" with normal mmap pages";
406 void touch(
const std::string& filename)
override {
408 tmp.
open(filename,
true);
413 tmp.
set(k, offset, val);
414 offset += val.size();
419 offset += val.size();
438 const std::string& tmp_path)
override {
440 if (!std::filesystem::exists(cur_path +
".data")) {
443 copy_file(cur_path +
".data", tmp_path +
".data");
444 copy_file(cur_path +
".items", tmp_path +
".items");
450 tmp.
open(tmp_path,
true);
456 void dump(
const std::string& filename)
override {
465 tmp.
open(filename,
true);
471 tmp.
set(k, offset, val);
472 offset += val.size();
477 offset += val.size();
487 std::unique_lock<std::shared_mutex> lock(rw_mutex_);
495 size_t basic_avg_width =
508 size_t pos = basic_pos_.load();
509 pos = pos + (pos + 4) / 5;
516 void set_value(
size_t idx,
const std::string_view& val) {
517 auto copied_val = val;
518 if (copied_val.size() >= width_) {
519 VLOG(1) <<
"String length" << copied_val.size()
520 <<
" exceeds the maximum length: " << width_ <<
", cut off.";
524 size_t offset = pos_.fetch_add(copied_val.size());
527 size_t offset = basic_pos_.fetch_add(copied_val.size());
530 LOG(FATAL) <<
"Index out of range";
541 size_t offset = pos_.fetch_add(value.size());
547 size_t offset = basic_pos_.fetch_add(value.size());
553 LOG(FATAL) <<
"Index out of range";
557 void set_value_safe(
size_t idx,
const std::string_view& value);
559 inline std::string_view
get_view(
size_t idx)
const {
568 void ingest(uint32_t index, grape::OutArchive& arc)
override {
569 std::string_view val;
602 template <
typename INDEX_T>
605 template <
typename INDEX_T>
624 const std::string& tmp_path)
override {
625 meta_map_->copy_to_tmp(cur_path +
".map_meta", tmp_path +
".map_meta");
629 const std::string& work_dir)
override;
632 void dump(
const std::string& filename)
override;
634 void touch(
const std::string& filename)
override {
650 void set_value(
size_t idx,
const std::string_view& val);
656 std::string_view
get_view(
size_t idx)
const;
662 void ingest(uint32_t index, grape::OutArchive& arc)
override {
663 std::string_view val;
681 template <
typename INDEX_T>
684 const std::string& work_dir) {
686 meta_map_->open(name +
".map_meta",
snapshot_dir, work_dir);
687 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
690 template <
typename INDEX_T>
692 index_col_.open_in_memory(name);
693 meta_map_->open_in_memory(name +
".map_meta");
694 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
697 template <
typename INDEX_T>
700 index_col_.open_with_hugepages(name, force);
701 meta_map_->open_with_hugepages(name +
".map_meta",
true);
702 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
705 template <
typename INDEX_T>
707 index_col_.dump(filename);
708 meta_map_->dump(filename +
".map_meta",
"");
711 template <
typename INDEX_T>
713 INDEX_T ind = index_col_.get_view(idx);
714 return meta_map_->get_key(ind).AsStringView();
717 template <
typename INDEX_T>
719 const std::string_view& val) {
721 if (!meta_map_->get_index(val, lid)) {
723 if (!meta_map_->get_index(val, lid)) {
724 lid = meta_map_->insert(val);
728 index_col_.set_value(idx, lid);
735 const std::vector<PropertyType>& sub_types = {});
738 template <
typename EDATA_T>
739 class ConcatColumn :
public ColumnBase {
743 ConcatColumn(
const TypedColumn<EDATA_T>& basic_column,
744 const TypedColumn<EDATA_T>& extra_column)
745 : basic_column_(basic_column),
746 extra_column_(extra_column),
747 basic_size_(basic_column.size()) {}
749 void open(
const std::string& name,
const std::string&
snapshot_dir,
750 const std::string& work_dir) {
751 LOG(FATAL) <<
"not implemented";
754 void open_in_memory(
const std::string& name) {
755 LOG(FATAL) <<
"not implemented";
758 void open_with_hugepages(
const std::string& name,
bool force) {
759 LOG(FATAL) <<
"not implemented";
762 void close() { LOG(FATAL) <<
"not implemented"; }
764 EDATA_T get_view(
size_t index)
const {
765 return index < basic_size_ ? basic_column_.get(index)
766 : extra_column_.get(index - basic_size_);
769 void touch(
const std::string& filename) { LOG(FATAL) <<
"not implemented"; }
771 virtual void dump(
const std::string& filename) {
772 LOG(FATAL) <<
"not implemented";
775 size_t size()
const {
return basic_size_ + extra_column_.size(); }
777 void copy_to_tmp(
const std::string& cur_path,
const std::string& tmp_path) {
778 LOG(FATAL) <<
"not implemented";
780 void resize(
size_t size) { LOG(FATAL) <<
"not implemented"; }
782 PropertyType type()
const {
return AnyConverter<EDATA_T>::type(); }
784 void set_any(
size_t index,
const Any& value) {
785 LOG(FATAL) <<
"not implemented";
788 Any get(
size_t index)
const {
789 if (index < basic_size_) {
790 return basic_column_.get(index);
792 return extra_column_.get(index - basic_size_);
796 void ingest(uint32_t index, grape::OutArchive& arc) {
797 LOG(FATAL) <<
"not implemented";
801 return basic_column_.storage_strategy();
805 const TypedColumn<EDATA_T>& basic_column_;
806 const TypedColumn<EDATA_T>& extra_column_;
815 virtual Any get(
size_t index)
const = 0;
819 template <
typename T>
868 LOG(ERROR) <<
"LabelKeyColumn does not support get() to Any";
889 LOG(ERROR) <<
"GlobalId Column does not support get() to Any";
901 std::shared_ptr<ColumnBase> column);
905 #endif // GRAPHSCOPE_PROPERTY_COLUMN_H_
const mmap_array< std::string_view > & extra_buffer() const
Definition: column.h:582
size_t extra_buffer_size() const
Definition: column.h:586
void touch(const std::string &filename) override
Definition: column.h:120
void resize(size_t size) override
Definition: column.h:176
T get_view(size_t index) const
Definition: column.h:838
size_t size() const
Definition: column.h:843
size_t basic_buffer_size() const
Definition: column.h:580
Any get(size_t index) const override
Definition: column.h:867
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:72
void resize(size_t size, size_t data_size)
Definition: mmap_array.h:483
static uint16_t GetStringDefaultMaxLength()
Definition: types.cc:103
size_t size() const
Definition: mmap_array.h:498
const LFIndexer< INDEX_T > & get_meta_map() const
Definition: column.h:673
void dump(const std::string &filename) override
Definition: column.h:706
PropertyType type() const override
Definition: column.h:514
std::string_view get_view(size_t idx) const
Definition: column.h:712
virtual void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)=0
std::vector< PropertyType > types_
Definition: column.h:294
void resize(size_t size) override
Definition: column.h:486
TypedColumn(StorageStrategy strategy)
Definition: column.h:69
void dump(const std::string &filename) override
Definition: column.h:456
void set_value(size_t index, const T &val)
Definition: column.h:189
void touch(const std::string &filename) override
Definition: column.h:255
virtual PropertyType type() const =0
PropertyType type() const override
Definition: column.h:272
void open(const std::string &filename, bool sync_to_file)
Definition: mmap_array.h:463
grape::SpinLock lock_
Definition: column.h:678
const mmap_array< std::string_view > & basic_buffer() const
Definition: column.h:574
std::shared_ptr< Table > table_
Definition: column.h:295
uint16_t width_
Definition: column.h:597
~TypedRefColumn()
Definition: column.h:882
LabelKey label_key_
Definition: column.h:873
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:320
mmap_array< T > basic_buffer_
Definition: column.h:226
void set_any(size_t index, const Any &value) override
Definition: column.h:331
Create RefColumn for ease of usage for hqps.
Definition: column.h:812
StorageStrategy storage_strategy() const override
Definition: column.h:286
void close() override
Definition: column.h:137
const TypedColumn< INDEX_T > & get_index_col() const
Definition: column.h:672
size_t size() const override
Definition: column.h:174
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:662
const mmap_array< T > & extra_buffer
Definition: column.h:852
size_t size() const override
Definition: column.h:484
void set_any(size_t idx, const Any &value) override
Definition: column.h:652
StringMapColumn(StorageStrategy strategy)
Definition: column.h:608
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:244
StorageStrategy storage_strategy() const override
Definition: column.h:341
void close() override
Definition: column.h:638
static PropertyType Varchar(uint16_t max_length)
Definition: types.cc:357
~TypedColumn()
Definition: column.h:357
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:101
TypedColumn(StorageStrategy strategy)
Definition: column.h:353
void resize(size_t size)
Definition: mmap_array.h:319
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:698
PropertyType type() const override
Definition: column.h:187
size_t size() const override
Definition: column.h:645
uint8_t label_data_type
Definition: types.h:284
void open_in_memory(const std::string &prefix) override
Definition: column.h:380
TypedColumn(StorageStrategy strategy, uint16_t width)
Definition: column.h:349
Definition: adj_list.h:23
~StringMapColumn()
Definition: column.h:615
void dump(const std::string &filename) override
Definition: column.h:259
Any get(size_t index) const override
Definition: column.h:335
static const PropertyType kEmpty
Definition: types.h:137
StorageStrategy
Definition: types.h:58
TypedColumn< INDEX_T > index_col_
Definition: column.h:676
void close() override
Definition: column.h:432
Any get(size_t index) const override
Definition: column.h:845
std::shared_ptr< RefColumnBase > CreateRefColumn(std::shared_ptr< ColumnBase > column)
Definition: column.cc:260
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:212
Any get(size_t index) const override
Definition: column.h:208
void reset()
Definition: mmap_array.h:453
LabelKey get_view(size_t index) const
Definition: column.h:865
StorageStrategy storage_strategy() const override
Definition: column.h:218
mmap_array< std::string_view > extra_buffer_
Definition: column.h:591
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
void resize(size_t size) override
Definition: column.h:646
size_t basic_size
Definition: column.h:851
const mmap_array< T > & extra_buffer() const
Definition: column.h:222
void open_in_memory(const std::string &name) override
Definition: column.h:691
StorageStrategy strategy_
Definition: column.h:344
size_t extra_size
Definition: column.h:853
virtual StorageStrategy storage_strategy() const =0
TypedColumn(StorageStrategy strategy)
Definition: column.h:314
Any get(size_t index) const override
Definition: column.h:888
StorageStrategy strategy_
Definition: column.h:595
T get_view(size_t index) const
Definition: column.h:203
TypedRefColumn(const TypedColumn< T > &column)
Definition: column.h:830
std::shared_ptr< ColumnBase > CreateColumn(PropertyType type, StorageStrategy strategy, const std::vector< PropertyType > &sub_types)
Definition: column.cc:141
PropertyType type_
Definition: column.h:598
std::atomic< size_t > pos_
Definition: column.h:593
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:339
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:516
size_t basic_buffer_size() const
Definition: column.h:221
TypedRefColumn(const mmap_array< T > &buffer, StorageStrategy strategy)
Definition: column.h:824
StorageStrategy storage_strategy() const override
Definition: column.h:578
std::shared_mutex rw_mutex_
Definition: column.h:596
std::atomic< size_t > basic_pos_
Definition: column.h:594
~TypedColumn()
Definition: column.h:242
size_t basic_size_
Definition: column.h:590
~TypedRefColumn()
Definition: column.h:863
void open_in_memory(const std::string &name) override
Definition: column.h:89
void open_with_hugepages(const std::string &prefix, bool force) override
Definition: column.h:390
size_t size() const
Definition: mmap_array.h:415
TypedColumn(const std::vector< PropertyType > &types)
Definition: column.h:236
virtual void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)=0
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:682
static const PropertyType kRecordView
Definition: types.h:153
virtual void set_any(size_t index, const Any &value)=0
void touch(const std::string &filename) override
Definition: column.h:634
Definition: mmap_array.h:447
StorageStrategy strategy_
Definition: column.h:230
size_t extra_size_
Definition: column.h:592
virtual ~RefColumnBase()
Definition: column.h:814
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:282
void touch(const std::string &filename) override
Definition: column.h:406
~TypedColumn()
Definition: column.h:70
size_t size() const override
Definition: column.h:326
TypedRefColumn(label_t label_key)
Definition: column.h:880
virtual void dump(const std::string &filename)=0
void dump(const std::string &filename) override
Definition: column.h:157
mmap_array< T > extra_buffer_
Definition: column.h:228
~TypedColumn()
Definition: column.h:315
T value_type
Definition: column.h:822
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:317
virtual Any get(size_t index) const =0
virtual ~ColumnBase()
Definition: column.h:34
virtual size_t size() const =0
PropertyType type() const override
Definition: column.h:648
virtual void open_in_memory(const std::string &name)=0
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:568
void copy_file(const std::string &src, const std::string &dst)
Definition: file_names.h:80
GlobalId get_view(size_t index) const
Definition: column.h:884
size_t extra_size_
Definition: column.h:229
void set_any(size_t index, const Any &value) override
Definition: column.h:199
std::string_view truncate_utf8(std::string_view str, size_t length)
Definition: column.cc:25
void touch(const std::string &filename) override
Definition: column.h:321
Definition: mmap_array.h:65
Any get(size_t idx) const override
Definition: column.h:658
StorageStrategy storage_strategy() const override
Definition: column.h:668
std::string snapshot_dir(const std::string &work_dir, uint32_t version)
Definition: file_names.h:192
Definition: loading_config.h:232
StorageStrategy strategy_
Definition: column.h:855
virtual void open_with_hugepages(const std::string &name, bool force)=0
void set_value(size_t index, const grape::EmptyType &value)
Definition: column.h:333
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:263
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:359
void close() override
Definition: column.h:325
std::string_view get_view(size_t idx) const
Definition: column.h:559
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:623
virtual Any get(size_t index) const =0
std::vector< PropertyType > sub_types() const
Definition: column.h:291
const mmap_array< T > & basic_buffer
Definition: column.h:850
virtual void ingest(uint32_t index, grape::OutArchive &arc)=0
void dump(const std::string &filename) override
Definition: column.h:322
Any get(size_t idx) const override
Definition: column.h:564
void set(size_t idx, size_t offset, const std::string_view &val)
Definition: mmap_array.h:488
size_t extra_buffer_size() const
Definition: column.h:223
~TypedRefColumn()
Definition: column.h:836
label_t label_key_
Definition: column.h:894
void resize(size_t size) override
Definition: column.h:327
void open_in_memory(const std::string &name) override
Definition: column.h:319
grape::EmptyType get_view(size_t index) const
Definition: column.h:337
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:437
LFIndexer< INDEX_T > * meta_map_
Definition: column.h:677
TypedRefColumn(LabelKey label_key)
Definition: column.h:861
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:718
PropertyType type() const override
Definition: column.h:329
virtual void resize(size_t size)=0
void reset()
Definition: mmap_array.h:84
void set(size_t idx, const T &val)
Definition: mmap_array.h:408
virtual void touch(const std::string &filename)=0
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:142
const mmap_array< T > & basic_buffer() const
Definition: column.h:220
Definition: id_indexer.h:184
static const PropertyType kStringMap
Definition: types.h:150
void set_any(size_t idx, const Any &value) override
Definition: column.h:534
typename LabelKey::label_data_type label_t
Definition: column.h:879
std::string_view AsStringView() const
Definition: types.h:657
size_t basic_size_
Definition: column.h:227
mmap_array< std::string_view > basic_buffer_
Definition: column.h:589
void set_value_with_check(size_t idx, const std::string_view &value)
Definition: column.h:539
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:251
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:323