16 #ifndef GRAPHSCOPE_PROPERTY_COLUMN_H_
17 #define GRAPHSCOPE_PROPERTY_COLUMN_H_
19 #include <shared_mutex>
21 #include <string_view>
22 #include "grape/utils/concurrent_queue.h"
26 #include "grape/serialization/out_archive.h"
30 std::string_view
truncate_utf8(std::string_view str,
size_t length);
37 const std::string& work_dir) = 0;
45 virtual void touch(
const std::string& filename) = 0;
47 virtual void dump(
const std::string& filename) = 0;
49 virtual size_t size()
const = 0;
52 const std::string& tmp_path) = 0;
57 virtual void set_any(
size_t index,
const Any& value) = 0;
59 virtual Any get(
size_t index)
const = 0;
61 virtual void ingest(uint32_t index, grape::OutArchive& arc) = 0;
73 const std::string& work_dir)
override {
75 if (std::filesystem::exists(basic_path)) {
90 if (!name.empty() && std::filesystem::exists(name)) {
103 if (!name.empty() && std::filesystem::exists(name)) {
115 LOG(INFO) <<
"Open " << name <<
" with normal mmap pages";
120 void touch(
const std::string& filename)
override {
122 tmp.
open(filename,
true);
143 const std::string& tmp_path)
override {
145 if (!std::filesystem::exists(cur_path)) {
151 tmp.
open(tmp_path,
true);
157 void dump(
const std::string& filename)
override {
164 tmp.
open(filename,
true);
195 throw std::runtime_error(
"Index out of range");
212 void ingest(uint32_t index, grape::OutArchive& arc)
override {
236 TypedColumn(
const std::vector<PropertyType>& types) : types_(types) {
237 if (types.size() == 0) {
238 LOG(FATAL) <<
"RecordView column must have sub types.";
245 const std::string& work_dir)
override {
246 LOG(FATAL) <<
"RecordView column does not support open.";
252 LOG(FATAL) <<
"RecordView column does not support open with hugepages.";
255 void touch(
const std::string& filename)
override {
256 LOG(FATAL) <<
"RecordView column does not support touch.";
259 void dump(
const std::string& filename)
override {
260 LOG(FATAL) <<
"RecordView column does not support dump.";
264 const std::string& tmp_path)
override {
265 LOG(FATAL) <<
"RecordView column does not support copy_to_tmp.";
267 void close()
override;
269 size_t size()
const override;
274 void set_any(
size_t index,
const Any& value)
override;
280 Any get(
size_t index)
const override;
282 void ingest(uint32_t index, grape::OutArchive& arc)
override {
283 LOG(FATAL) <<
"RecordView column does not support ingest.";
287 LOG(ERROR) <<
"RecordView column does not have storage strategy.";
291 std::vector<PropertyType>
sub_types()
const {
return types_; }
318 const std::string& work_dir)
override {}
321 void touch(
const std::string& filename)
override {}
322 void dump(
const std::string& filename)
override {}
324 const std::string& tmp_path)
override {}
326 size_t size()
const override {
return 0; }
333 void set_value(
size_t index,
const grape::EmptyType& value) {}
337 grape::EmptyType
get_view(
size_t index)
const {
return grape::EmptyType(); }
339 void ingest(uint32_t index, grape::OutArchive& arc)
override {}
360 const std::string& work_dir)
override {
362 if (std::filesystem::exists(basic_path +
".items")) {
370 if (work_dir ==
"") {
401 LOG(INFO) <<
"Open " << prefix <<
" with normal mmap pages";
406 void touch(
const std::string& filename)
override {
408 tmp.
open(filename,
true);
413 tmp.
set(k, offset, val);
414 offset += val.size();
419 offset += val.size();
438 const std::string& tmp_path)
override {
440 if (!std::filesystem::exists(cur_path +
".data")) {
443 copy_file(cur_path +
".data", tmp_path +
".data");
444 copy_file(cur_path +
".items", tmp_path +
".items");
450 tmp.
open(tmp_path,
true);
456 void dump(
const std::string& filename)
override {
465 tmp.
open(filename,
true);
471 tmp.
set(k, offset, val);
472 offset += val.size();
477 offset += val.size();
487 std::unique_lock<std::shared_mutex> lock(rw_mutex_);
495 size_t basic_avg_width =
508 size_t pos = basic_pos_.load();
509 pos = pos + (pos + 4) / 5;
516 void set_value(
size_t idx,
const std::string_view& val) {
517 auto copied_val = val;
518 if (copied_val.size() >= width_) {
519 VLOG(1) <<
"String length" << copied_val.size()
520 <<
" exceeds the maximum length: " << width_ <<
", cut off.";
524 size_t offset = pos_.fetch_add(copied_val.size());
527 size_t offset = basic_pos_.fetch_add(copied_val.size());
530 LOG(FATAL) <<
"Index out of range";
541 size_t offset = pos_.fetch_add(value.size());
547 size_t offset = basic_pos_.fetch_add(value.size());
553 LOG(FATAL) <<
"Index out of range";
557 void set_value_safe(
size_t idx,
const std::string_view& value);
559 inline std::string_view
get_view(
size_t idx)
const {
568 void ingest(uint32_t index, grape::OutArchive& arc)
override {
569 std::string_view val;
602 template <
typename INDEX_T>
605 template <
typename INDEX_T>
624 const std::string& tmp_path)
override {
625 meta_map_->copy_to_tmp(cur_path +
".map_meta", tmp_path +
".map_meta");
629 const std::string& work_dir)
override;
632 void dump(
const std::string& filename)
override;
634 void touch(
const std::string& filename)
override {
650 void set_value(
size_t idx,
const std::string_view& val);
656 std::string_view
get_view(
size_t idx)
const;
662 void ingest(uint32_t index, grape::OutArchive& arc)
override {
663 std::string_view val;
681 template <
typename INDEX_T>
684 const std::string& work_dir) {
686 meta_map_->open(name +
".map_meta",
snapshot_dir, work_dir);
687 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
690 template <
typename INDEX_T>
692 index_col_.open_in_memory(name);
693 meta_map_->open_in_memory(name +
".map_meta");
694 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
697 template <
typename INDEX_T>
700 index_col_.open_with_hugepages(name, force);
701 meta_map_->open_with_hugepages(name +
".map_meta",
true);
702 meta_map_->reserve(std::numeric_limits<INDEX_T>::max());
705 template <
typename INDEX_T>
707 index_col_.dump(filename);
708 meta_map_->dump(filename +
".map_meta",
"");
711 template <
typename INDEX_T>
713 INDEX_T ind = index_col_.get_view(idx);
714 return meta_map_->get_key(ind).AsStringView();
717 template <
typename INDEX_T>
719 const std::string_view& val) {
721 if (!meta_map_->get_index(val, lid)) {
723 if (!meta_map_->get_index(val, lid)) {
724 lid = meta_map_->insert(val);
728 index_col_.set_value(idx, lid);
735 const std::vector<PropertyType>& sub_types = {});
738 template <
typename EDATA_T>
739 class ConcatColumn :
public ColumnBase {
743 ConcatColumn(
const TypedColumn<EDATA_T>& basic_column,
744 const TypedColumn<EDATA_T>& extra_column)
745 : basic_column_(basic_column),
746 extra_column_(extra_column),
747 basic_size_(basic_column.size()) {}
749 void open(
const std::string& name,
const std::string&
snapshot_dir,
750 const std::string& work_dir) {
751 LOG(FATAL) <<
"not implemented";
754 void open_in_memory(
const std::string& name) {
755 LOG(FATAL) <<
"not implemented";
758 void open_with_hugepages(
const std::string& name,
bool force) {
759 LOG(FATAL) <<
"not implemented";
762 void close() { LOG(FATAL) <<
"not implemented"; }
764 EDATA_T get_view(
size_t index)
const {
765 return index < basic_size_ ? basic_column_.get(index)
766 : extra_column_.get(index - basic_size_);
769 void touch(
const std::string& filename) { LOG(FATAL) <<
"not implemented"; }
771 virtual void dump(
const std::string& filename) {
772 LOG(FATAL) <<
"not implemented";
775 size_t size()
const {
return basic_size_ + extra_column_.size(); }
777 void copy_to_tmp(
const std::string& cur_path,
const std::string& tmp_path) {
778 LOG(FATAL) <<
"not implemented";
780 void resize(
size_t size) { LOG(FATAL) <<
"not implemented"; }
782 PropertyType type()
const {
return AnyConverter<EDATA_T>::type(); }
784 void set_any(
size_t index,
const Any& value) {
785 LOG(FATAL) <<
"not implemented";
788 Any get(
size_t index)
const {
789 if (index < basic_size_) {
790 return basic_column_.get(index);
792 return extra_column_.get(index - basic_size_);
796 void ingest(uint32_t index, grape::OutArchive& arc) {
797 LOG(FATAL) <<
"not implemented";
801 return basic_column_.storage_strategy();
805 const TypedColumn<EDATA_T>& basic_column_;
806 const TypedColumn<EDATA_T>& extra_column_;
819 template <
typename T>
868 LOG(ERROR) <<
"LabelKeyColumn does not support get() to Any";
889 LOG(ERROR) <<
"GlobalId Column does not support get() to Any";
901 std::shared_ptr<ColumnBase> column);
virtual void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path)=0
virtual PropertyType type() const =0
virtual void set_any(size_t index, const Any &value)=0
virtual void touch(const std::string &filename)=0
virtual ~ColumnBase()
Definition: column.h:34
virtual size_t size() const =0
virtual StorageStrategy storage_strategy() const =0
virtual void ingest(uint32_t index, grape::OutArchive &arc)=0
virtual Any get(size_t index) const =0
virtual void open_with_hugepages(const std::string &name, bool force)=0
virtual void open_in_memory(const std::string &name)=0
virtual void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir)=0
virtual void resize(size_t size)=0
virtual void dump(const std::string &filename)=0
Definition: id_indexer.h:193
Create RefColumn for ease of usage for hqps.
Definition: column.h:812
virtual ~RefColumnBase()
Definition: column.h:814
virtual Any get(size_t index) const =0
void open_in_memory(const std::string &name) override
Definition: column.h:691
void close() override
Definition: column.h:638
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:662
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:698
const TypedColumn< INDEX_T > & get_index_col() const
Definition: column.h:672
void set_any(size_t idx, const Any &value) override
Definition: column.h:652
std::string_view get_view(size_t idx) const
Definition: column.h:712
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:718
StorageStrategy storage_strategy() const override
Definition: column.h:668
TypedColumn< INDEX_T > index_col_
Definition: column.h:676
void resize(size_t size) override
Definition: column.h:646
const LFIndexer< INDEX_T > & get_meta_map() const
Definition: column.h:673
~StringMapColumn()
Definition: column.h:615
size_t size() const override
Definition: column.h:645
grape::SpinLock lock_
Definition: column.h:678
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:623
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:682
void touch(const std::string &filename) override
Definition: column.h:634
void dump(const std::string &filename) override
Definition: column.h:706
Any get(size_t idx) const override
Definition: column.h:658
StringMapColumn(StorageStrategy strategy)
Definition: column.h:608
PropertyType type() const override
Definition: column.h:648
LFIndexer< INDEX_T > * meta_map_
Definition: column.h:677
TypedColumn(const std::vector< PropertyType > &types)
Definition: column.h:236
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:244
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:282
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:263
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:251
void dump(const std::string &filename) override
Definition: column.h:259
PropertyType type() const override
Definition: column.h:272
std::vector< PropertyType > sub_types() const
Definition: column.h:291
~TypedColumn()
Definition: column.h:242
StorageStrategy storage_strategy() const override
Definition: column.h:286
void touch(const std::string &filename) override
Definition: column.h:255
std::vector< PropertyType > types_
Definition: column.h:294
std::shared_ptr< Table > table_
Definition: column.h:295
size_t size() const override
Definition: column.h:326
Any get(size_t index) const override
Definition: column.h:335
void close() override
Definition: column.h:325
~TypedColumn()
Definition: column.h:315
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:320
void dump(const std::string &filename) override
Definition: column.h:322
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:323
void touch(const std::string &filename) override
Definition: column.h:321
TypedColumn(StorageStrategy strategy)
Definition: column.h:314
grape::EmptyType get_view(size_t index) const
Definition: column.h:337
StorageStrategy strategy_
Definition: column.h:344
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:339
StorageStrategy storage_strategy() const override
Definition: column.h:341
void set_any(size_t index, const Any &value) override
Definition: column.h:331
void open_in_memory(const std::string &name) override
Definition: column.h:319
void set_value(size_t index, const grape::EmptyType &value)
Definition: column.h:333
void resize(size_t size) override
Definition: column.h:327
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:317
PropertyType type() const override
Definition: column.h:329
void close() override
Definition: column.h:432
Any get(size_t idx) const override
Definition: column.h:564
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:568
void open_in_memory(const std::string &prefix) override
Definition: column.h:380
std::string_view get_view(size_t idx) const
Definition: column.h:559
StorageStrategy storage_strategy() const override
Definition: column.h:578
size_t basic_size_
Definition: column.h:590
size_t size() const override
Definition: column.h:484
mmap_array< std::string_view > basic_buffer_
Definition: column.h:589
size_t extra_size_
Definition: column.h:592
void set_value(size_t idx, const std::string_view &val)
Definition: column.h:516
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:437
PropertyType type_
Definition: column.h:598
void dump(const std::string &filename) override
Definition: column.h:456
uint16_t width_
Definition: column.h:597
TypedColumn(StorageStrategy strategy, uint16_t width)
Definition: column.h:349
const mmap_array< std::string_view > & basic_buffer() const
Definition: column.h:574
size_t extra_buffer_size() const
Definition: column.h:586
mmap_array< std::string_view > extra_buffer_
Definition: column.h:591
void touch(const std::string &filename) override
Definition: column.h:406
size_t basic_buffer_size() const
Definition: column.h:580
void resize(size_t size) override
Definition: column.h:486
void set_any(size_t idx, const Any &value) override
Definition: column.h:534
StorageStrategy strategy_
Definition: column.h:595
std::atomic< size_t > pos_
Definition: column.h:593
PropertyType type() const override
Definition: column.h:514
~TypedColumn()
Definition: column.h:357
std::atomic< size_t > basic_pos_
Definition: column.h:594
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:359
std::shared_mutex rw_mutex_
Definition: column.h:596
void set_value_with_check(size_t idx, const std::string_view &value)
Definition: column.h:539
void open_with_hugepages(const std::string &prefix, bool force) override
Definition: column.h:390
const mmap_array< std::string_view > & extra_buffer() const
Definition: column.h:582
TypedColumn(StorageStrategy strategy)
Definition: column.h:353
PropertyType type() const override
Definition: column.h:187
void ingest(uint32_t index, grape::OutArchive &arc) override
Definition: column.h:212
Any get(size_t index) const override
Definition: column.h:208
StorageStrategy storage_strategy() const override
Definition: column.h:218
mmap_array< T > basic_buffer_
Definition: column.h:226
TypedColumn(StorageStrategy strategy)
Definition: column.h:69
void dump(const std::string &filename) override
Definition: column.h:157
void set_any(size_t index, const Any &value) override
Definition: column.h:199
void touch(const std::string &filename) override
Definition: column.h:120
size_t size() const override
Definition: column.h:174
~TypedColumn()
Definition: column.h:70
void open(const std::string &name, const std::string &snapshot_dir, const std::string &work_dir) override
Definition: column.h:72
const mmap_array< T > & extra_buffer() const
Definition: column.h:222
void open_in_memory(const std::string &name) override
Definition: column.h:89
mmap_array< T > extra_buffer_
Definition: column.h:228
size_t extra_buffer_size() const
Definition: column.h:223
void copy_to_tmp(const std::string &cur_path, const std::string &tmp_path) override
Definition: column.h:142
size_t basic_buffer_size() const
Definition: column.h:221
void resize(size_t size) override
Definition: column.h:176
T get_view(size_t index) const
Definition: column.h:203
StorageStrategy strategy_
Definition: column.h:230
void set_value(size_t index, const T &val)
Definition: column.h:189
size_t basic_size_
Definition: column.h:227
const mmap_array< T > & basic_buffer() const
Definition: column.h:220
void close() override
Definition: column.h:137
size_t extra_size_
Definition: column.h:229
void open_with_hugepages(const std::string &name, bool force) override
Definition: column.h:101
Any get(size_t index) const override
Definition: column.h:888
label_t label_key_
Definition: column.h:894
~TypedRefColumn()
Definition: column.h:882
TypedRefColumn(label_t label_key)
Definition: column.h:880
typename LabelKey::label_data_type label_t
Definition: column.h:879
GlobalId get_view(size_t index) const
Definition: column.h:884
Any get(size_t index) const override
Definition: column.h:867
LabelKey label_key_
Definition: column.h:873
LabelKey get_view(size_t index) const
Definition: column.h:865
~TypedRefColumn()
Definition: column.h:863
TypedRefColumn(LabelKey label_key)
Definition: column.h:861
TypedRefColumn(const mmap_array< T > &buffer, StorageStrategy strategy)
Definition: column.h:824
size_t basic_size
Definition: column.h:851
size_t extra_size
Definition: column.h:853
const mmap_array< T > & basic_buffer
Definition: column.h:850
T value_type
Definition: column.h:822
T get_view(size_t index) const
Definition: column.h:838
size_t size() const
Definition: column.h:843
~TypedRefColumn()
Definition: column.h:836
StorageStrategy strategy_
Definition: column.h:855
Any get(size_t index) const override
Definition: column.h:845
const mmap_array< T > & extra_buffer
Definition: column.h:852
TypedRefColumn(const TypedColumn< T > &column)
Definition: column.h:830
Definition: mmap_array.h:447
void reset()
Definition: mmap_array.h:453
void set(size_t idx, size_t offset, const std::string_view &val)
Definition: mmap_array.h:488
size_t size() const
Definition: mmap_array.h:498
void resize(size_t size, size_t data_size)
Definition: mmap_array.h:483
void open(const std::string &filename, bool sync_to_file)
Definition: mmap_array.h:463
Definition: mmap_array.h:65
size_t size() const
Definition: mmap_array.h:415
void set(size_t idx, const T &val)
Definition: mmap_array.h:408
void reset()
Definition: mmap_array.h:84
void resize(size_t size)
Definition: mmap_array.h:319
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
Definition: adj_list.h:23
std::shared_ptr< RefColumnBase > CreateRefColumn(std::shared_ptr< ColumnBase > column)
Definition: column.cc:260
std::shared_ptr< ColumnBase > CreateColumn(PropertyType type, StorageStrategy strategy, const std::vector< PropertyType > &sub_types)
Definition: column.cc:141
std::string_view truncate_utf8(std::string_view str, size_t length)
Definition: column.cc:25
void copy_file(const std::string &src, const std::string &dst)
Definition: file_names.h:80
std::string snapshot_dir(const std::string &work_dir, uint32_t version)
Definition: file_names.h:192
StorageStrategy
Definition: types.h:58
Definition: loading_config.h:232
std::string_view AsStringView() const
Definition: types.h:657
uint8_t label_data_type
Definition: types.h:284
static const PropertyType kRecordView
Definition: types.h:153
static const PropertyType kEmpty
Definition: types.h:137
static PropertyType Varchar(uint16_t max_length)
Definition: types.cc:357
static uint16_t GetStringDefaultMaxLength()
Definition: types.cc:103
static const PropertyType kStringMap
Definition: types.h:150