16 #ifndef STORAGE_RT_MUTABLE_GRAPH_LOADING_CONFIG_H_
17 #define STORAGE_RT_MUTABLE_GRAPH_LOADING_CONFIG_H_
19 #include <boost/functional/hash.hpp>
25 #include <unordered_map>
26 #include <unordered_set>
27 #include "arrow/api.h"
28 #include "arrow/csv/options.h"
33 #include "boost/algorithm/string.hpp"
37 namespace reader_options {
65 namespace loader_options {
76 namespace config_parsing {
98 const Schema& schema,
const std::string& yaml_file);
100 const Schema& schema,
const YAML::Node& yaml_node);
106 const std::string& format);
110 const std::string& file_path);
116 const std::string& dst_label,
117 const std::string& edge_label,
size_t src_pri_key_ind,
118 size_t dst_pri_key_ind,
const std::string& file_path);
120 void SetScheme(
const std::string& data_source);
138 std::string
GetMetaData(
const std::string& key)
const;
139 const std::unordered_map<schema_label_type, std::vector<std::string>>&
141 const std::unordered_map<edge_triplet_type, std::vector<std::string>,
142 boost::hash<edge_triplet_type>>&
147 const std::vector<std::tuple<size_t, std::string, std::string>>&
152 const std::vector<std::tuple<size_t, std::string, std::string>>&
157 const std::pair<std::vector<std::pair<std::string, size_t>>,
158 std::vector<std::pair<std::string, size_t>>>&
189 std::unordered_map<schema_label_type, std::vector<std::string>>
192 std::vector<std::tuple<size_t, std::string, std::string>>>
198 std::unordered_map<edge_triplet_type, std::vector<std::string>,
199 boost::hash<edge_triplet_type>>
205 std::vector<std::tuple<size_t, std::string, std::string>>,
206 boost::hash<edge_triplet_type>>
217 std::pair<std::vector<std::pair<std::string, size_t>>,
218 std::vector<std::pair<std::string, size_t>>>,
219 boost::hash<edge_triplet_type>>
223 const std::string& config_file,
const Schema& schema,
Definition: loading_config.h:89
void SetParallelism(int32_t parallelism)
Definition: loading_config.h:162
const std::vector< std::tuple< size_t, std::string, std::string > > & GetVertexColumnMappings(label_t label_id) const
Definition: loading_config.cc:898
bool GetIsDoubleQuoting() const
Definition: loading_config.cc:852
const BulkLoadMethod & GetMethod() const
Definition: loading_config.cc:824
char GetQuotingChar() const
Definition: loading_config.cc:839
std::unordered_map< schema_label_type, std::vector< std::tuple< size_t, std::string, std::string > > > vertex_column_mappings_
Definition: loading_config.h:193
const std::string & GetFormat() const
Definition: loading_config.cc:822
Status AddEdgeSources(const std::string &src_label, const std::string &dst_label, const std::string &edge_label, size_t src_pri_key_ind, size_t dst_pri_key_ind, const std::string &file_path)
Definition: loading_config.cc:785
LoadingConfig(const Schema &schema)
Definition: loading_config.cc:754
std::tuple< schema_label_type, schema_label_type, schema_label_type > edge_triplet_type
Definition: loading_config.h:94
int32_t GetBatchSize() const
Definition: loading_config.cc:861
void SetBuildCsrInMem(bool build_csr_in_mem)
Definition: loading_config.h:165
bool GetIsBatchReader() const
Definition: loading_config.cc:869
std::unordered_map< std::string, std::string > metadata_
Definition: loading_config.h:187
bool GetIsQuoting() const
Definition: loading_config.cc:847
char GetEscapeChar() const
Definition: loading_config.cc:826
bool GetIsEscaping() const
Definition: loading_config.cc:834
Status AddVertexSources(const std::string &label, const std::string &file_path)
Definition: loading_config.cc:778
const std::unordered_map< edge_triplet_type, std::vector< std::string >, boost::hash< edge_triplet_type > > & GetEdgeLoadingMeta() const
Definition: loading_config.cc:893
std::unordered_map< edge_triplet_type, std::vector< std::tuple< size_t, std::string, std::string > >, boost::hash< edge_triplet_type > > edge_column_mappings_
Definition: loading_config.h:207
void SetMethod(const BulkLoadMethod &method)
Definition: loading_config.cc:806
static gs::Result< LoadingConfig > ParseFromYamlNode(const Schema &schema, const YAML::Node &yaml_node)
Definition: loading_config.cc:733
bool build_csr_in_mem_
Definition: loading_config.h:181
void SetDelimiter(const char &delimiter)
Definition: loading_config.cc:803
const std::vector< std::string > & GetNullValues() const
Definition: loading_config.cc:857
std::string GetMetaData(const std::string &key) const
Definition: loading_config.cc:877
std::unordered_map< edge_triplet_type, std::pair< std::vector< std::pair< std::string, size_t > >, std::vector< std::pair< std::string, size_t > > >, boost::hash< edge_triplet_type > > edge_src_dst_col_
Definition: loading_config.h:220
std::string format_
Definition: loading_config.h:179
bool use_mmap_vector_
Definition: loading_config.h:182
bool GetHasHeaderRow() const
Definition: loading_config.cc:817
Schema::label_type schema_label_type
Definition: loading_config.h:91
int32_t parallelism_
Definition: loading_config.h:180
BulkLoadMethod method_
Definition: loading_config.h:178
const std::pair< std::vector< std::pair< std::string, size_t > >, std::vector< std::pair< std::string, size_t > > > & GetEdgeSrcDstCol(label_t src_label_id, label_t dst_label_id, label_t edge_label_id) const
Definition: loading_config.cc:917
static gs::Result< LoadingConfig > ParseFromYamlFile(const Schema &schema, const std::string &yaml_file)
Definition: loading_config.cc:716
int32_t GetParallelism() const
Definition: loading_config.h:171
std::unordered_map< edge_triplet_type, std::vector< std::string >, boost::hash< edge_triplet_type > > edge_loading_meta_
Definition: loading_config.h:200
std::vector< std::string > null_values_
Definition: loading_config.h:184
const std::string & GetScheme() const
Definition: loading_config.cc:811
std::string scheme_
Definition: loading_config.h:177
std::unordered_map< schema_label_type, std::vector< std::string > > vertex_loading_meta_
Definition: loading_config.h:190
bool GetUseMmapVector() const
Definition: loading_config.h:173
void SetScheme(const std::string &data_source)
Definition: loading_config.cc:802
const std::string & GetDelimiter() const
Definition: loading_config.cc:813
bool GetBuildCsrInMem() const
Definition: loading_config.h:172
const std::unordered_map< schema_label_type, std::vector< std::string > > & GetVertexLoadingMeta() const
Definition: loading_config.cc:886
void SetUseMmapVector(bool use_mmap_vector)
Definition: loading_config.h:168
const std::vector< std::tuple< size_t, std::string, std::string > > & GetEdgeColumnMappings(label_t src_label_id, label_t dst_label_id, label_t edge_label_id) const
Definition: loading_config.cc:906
const Schema & schema_
Definition: loading_config.h:176
label_t label_type
Definition: schema.h:77
Status parse_bulk_load_config_file(const std::string &config_file, const Schema &schema, LoadingConfig &load_config)
Definition: loading_config.cc:531
Status parse_bulk_load_config_yaml(const YAML::Node &root, const Schema &schema, LoadingConfig &load_config)
Definition: loading_config.cc:556
static constexpr const char * USE_MMAP_VECTOR
Definition: loading_config.h:68
static constexpr const int32_t DEFAULT_PARALLELISM
Definition: loading_config.h:69
static constexpr const char * BUILD_CSR_IN_MEM
Definition: loading_config.h:67
static constexpr const bool DEFAULT_BUILD_CSR_IN_MEM
Definition: loading_config.h:70
static constexpr const bool DEFAULT_USE_MMAP_VECTOR
Definition: loading_config.h:71
static constexpr const char * PARALLELISM
Definition: loading_config.h:66
static const char * NULL_VALUES
Definition: loading_config.h:56
static const std::unordered_set< std::string > CSV_META_KEY_WORDS
Definition: loading_config.h:58
static const char * QUOTING
Definition: loading_config.h:49
static const char * INCLUDE_COLUMNS
Definition: loading_config.h:45
static const char * ESCAPE_CHAR
Definition: loading_config.h:48
static const char * QUOTE_CHAR
Definition: loading_config.h:50
static const bool DEFAULT_BATCH_READER
Definition: loading_config.h:39
static const char * HEADER_ROW
Definition: loading_config.h:44
static const char * DELIMITER
Definition: loading_config.h:43
static const char * DOUBLE_QUOTE
Definition: loading_config.h:51
static const char * ESCAPING
Definition: loading_config.h:47
static const char * BATCH_SIZE_KEY
Definition: loading_config.h:52
static const char * COLUMN_TYPES
Definition: loading_config.h:46
static const char * BATCH_READER
Definition: loading_config.h:55
static const int32_t DEFAULT_BLOCK_SIZE
Definition: loading_config.h:38
Definition: adj_list.h:23
BulkLoadMethod
Definition: loading_config.h:86
uint8_t label_t
Definition: types.h:32
Definition: loading_config.h:232
ostream & operator<<(ostream &os, const gs::BulkLoadMethod &method)
Definition: loading_config.h:234