Go to the documentation of this file.
16 #ifndef STORAGE_RT_MUTABLE_GRAPH_LOADING_CONFIG_H_
17 #define STORAGE_RT_MUTABLE_GRAPH_LOADING_CONFIG_H_
19 #include <boost/functional/hash.hpp>
25 #include <unordered_map>
26 #include <unordered_set>
27 #include "arrow/api.h"
28 #include "arrow/csv/options.h"
33 #include "boost/algorithm/string.hpp"
37 namespace reader_options {
65 namespace loader_options {
76 namespace config_parsing {
98 const Schema& schema,
const std::string& yaml_file);
100 const Schema& schema,
const YAML::Node& yaml_node);
106 const std::string& format);
110 const std::string& file_path);
116 const std::string& dst_label,
117 const std::string& edge_label,
size_t src_pri_key_ind,
118 size_t dst_pri_key_ind,
const std::string& file_path);
120 void SetScheme(
const std::string& data_source);
138 std::string
GetMetaData(
const std::string& key)
const;
139 const std::unordered_map<schema_label_type, std::vector<std::string>>&
141 const std::unordered_map<edge_triplet_type, std::vector<std::string>,
142 boost::hash<edge_triplet_type>>&
147 const std::vector<std::tuple<size_t, std::string, std::string>>&
152 const std::vector<std::tuple<size_t, std::string, std::string>>&
157 const std::pair<std::vector<std::pair<std::string, size_t>>,
158 std::vector<std::pair<std::string, size_t>>>&
189 std::unordered_map<schema_label_type, std::vector<std::string>>
192 std::vector<std::tuple<size_t, std::string, std::string>>>
198 std::unordered_map<edge_triplet_type, std::vector<std::string>,
199 boost::hash<edge_triplet_type>>
205 std::vector<std::tuple<size_t, std::string, std::string>>,
206 boost::hash<edge_triplet_type>>
217 std::pair<std::vector<std::pair<std::string, size_t>>,
218 std::vector<std::pair<std::string, size_t>>>,
219 boost::hash<edge_triplet_type>>
223 const std::string& config_file,
const Schema& schema,
250 #endif // STORAGE_RT_MUTABLE_GRAPH_LOADING_CONFIG_H_
static const char * DELIMITER
Definition: loading_config.h:43
const std::vector< std::tuple< size_t, std::string, std::string > > & GetEdgeColumnMappings(label_t src_label_id, label_t dst_label_id, label_t edge_label_id) const
Definition: loading_config.cc:906
bool GetIsEscaping() const
Definition: loading_config.cc:834
const BulkLoadMethod & GetMethod() const
Definition: loading_config.cc:824
static const char * QUOTE_CHAR
Definition: loading_config.h:50
const std::string & GetFormat() const
Definition: loading_config.cc:822
static constexpr const char * USE_MMAP_VECTOR
Definition: loading_config.h:68
std::tuple< schema_label_type, schema_label_type, schema_label_type > edge_triplet_type
Definition: loading_config.h:94
const std::unordered_map< schema_label_type, std::vector< std::string > > & GetVertexLoadingMeta() const
Definition: loading_config.cc:886
static constexpr const int32_t DEFAULT_PARALLELISM
Definition: loading_config.h:69
bool GetIsDoubleQuoting() const
Definition: loading_config.cc:852
std::unordered_map< schema_label_type, std::vector< std::string > > vertex_loading_meta_
Definition: loading_config.h:190
Status AddEdgeSources(const std::string &src_label, const std::string &dst_label, const std::string &edge_label, size_t src_pri_key_ind, size_t dst_pri_key_ind, const std::string &file_path)
Definition: loading_config.cc:785
static constexpr const bool DEFAULT_BUILD_CSR_IN_MEM
Definition: loading_config.h:70
void SetDelimiter(const char &delimiter)
Definition: loading_config.cc:803
std::unordered_map< std::string, std::string > metadata_
Definition: loading_config.h:187
bool use_mmap_vector_
Definition: loading_config.h:182
std::string format_
Definition: loading_config.h:179
std::unordered_map< edge_triplet_type, std::pair< std::vector< std::pair< std::string, size_t > >, std::vector< std::pair< std::string, size_t > > >, boost::hash< edge_triplet_type > > edge_src_dst_col_
Definition: loading_config.h:220
static gs::Result< LoadingConfig > ParseFromYamlFile(const Schema &schema, const std::string &yaml_file)
Definition: loading_config.cc:716
static const char * BATCH_READER
Definition: loading_config.h:55
static const char * HEADER_ROW
Definition: loading_config.h:44
static const char * BATCH_SIZE_KEY
Definition: loading_config.h:52
Definition: adj_list.h:23
static const char * INCLUDE_COLUMNS
Definition: loading_config.h:45
bool GetBuildCsrInMem() const
Definition: loading_config.h:172
void SetUseMmapVector(bool use_mmap_vector)
Definition: loading_config.h:168
const std::vector< std::tuple< size_t, std::string, std::string > > & GetVertexColumnMappings(label_t label_id) const
Definition: loading_config.cc:898
bool GetIsBatchReader() const
Definition: loading_config.cc:869
static const char * ESCAPING
Definition: loading_config.h:47
int32_t parallelism_
Definition: loading_config.h:180
static const bool DEFAULT_BATCH_READER
Definition: loading_config.h:39
Schema::label_type schema_label_type
Definition: loading_config.h:91
void SetBuildCsrInMem(bool build_csr_in_mem)
Definition: loading_config.h:165
const std::string & GetDelimiter() const
Definition: loading_config.cc:813
bool GetHasHeaderRow() const
Definition: loading_config.cc:817
static const int32_t DEFAULT_BLOCK_SIZE
Definition: loading_config.h:38
std::string GetMetaData(const std::string &key) const
Definition: loading_config.cc:877
void SetScheme(const std::string &data_source)
Definition: loading_config.cc:802
const std::pair< std::vector< std::pair< std::string, size_t > >, std::vector< std::pair< std::string, size_t > > > & GetEdgeSrcDstCol(label_t src_label_id, label_t dst_label_id, label_t edge_label_id) const
Definition: loading_config.cc:917
static const std::unordered_set< std::string > CSV_META_KEY_WORDS
Definition: loading_config.h:58
int32_t GetParallelism() const
Definition: loading_config.h:171
Status parse_bulk_load_config_yaml(const YAML::Node &root, const Schema &schema, LoadingConfig &load_config)
Definition: loading_config.cc:556
static const char * NULL_VALUES
Definition: loading_config.h:56
const std::vector< std::string > & GetNullValues() const
Definition: loading_config.cc:857
std::unordered_map< schema_label_type, std::vector< std::tuple< size_t, std::string, std::string > > > vertex_column_mappings_
Definition: loading_config.h:193
bool build_csr_in_mem_
Definition: loading_config.h:181
const std::unordered_map< edge_triplet_type, std::vector< std::string >, boost::hash< edge_triplet_type > > & GetEdgeLoadingMeta() const
Definition: loading_config.cc:893
bool GetIsQuoting() const
Definition: loading_config.cc:847
ostream & operator<<(ostream &os, const gs::BulkLoadMethod &method)
Definition: loading_config.h:234
static const char * DOUBLE_QUOTE
Definition: loading_config.h:51
BulkLoadMethod method_
Definition: loading_config.h:178
const std::string & GetScheme() const
Definition: loading_config.cc:811
Status AddVertexSources(const std::string &label, const std::string &file_path)
Definition: loading_config.cc:778
BulkLoadMethod
Definition: loading_config.h:86
label_t label_type
Definition: schema.h:62
LoadingConfig(const Schema &schema)
Definition: loading_config.cc:754
static const char * QUOTING
Definition: loading_config.h:49
Definition: loading_config.h:232
static gs::Result< LoadingConfig > ParseFromYamlNode(const Schema &schema, const YAML::Node &yaml_node)
Definition: loading_config.cc:733
Status parse_bulk_load_config_file(const std::string &config_file, const Schema &schema, LoadingConfig &load_config)
Definition: loading_config.cc:531
std::unordered_map< edge_triplet_type, std::vector< std::string >, boost::hash< edge_triplet_type > > edge_loading_meta_
Definition: loading_config.h:200
static constexpr const char * BUILD_CSR_IN_MEM
Definition: loading_config.h:67
char GetQuotingChar() const
Definition: loading_config.cc:839
void SetMethod(const BulkLoadMethod &method)
Definition: loading_config.cc:806
static constexpr const char * PARALLELISM
Definition: loading_config.h:66
static const char * ESCAPE_CHAR
Definition: loading_config.h:48
static const char * COLUMN_TYPES
Definition: loading_config.h:46
bool GetUseMmapVector() const
Definition: loading_config.h:173
char GetEscapeChar() const
Definition: loading_config.cc:826
Definition: loading_config.h:89
std::vector< std::string > null_values_
Definition: loading_config.h:184
uint8_t label_t
Definition: types.h:32
int32_t GetBatchSize() const
Definition: loading_config.cc:861
std::string scheme_
Definition: loading_config.h:177
static constexpr const bool DEFAULT_USE_MMAP_VECTOR
Definition: loading_config.h:71
void SetParallelism(int32_t parallelism)
Definition: loading_config.h:162
std::unordered_map< edge_triplet_type, std::vector< std::tuple< size_t, std::string, std::string > >, boost::hash< edge_triplet_type > > edge_column_mappings_
Definition: loading_config.h:207
const Schema & schema_
Definition: loading_config.h:176