Flex  0.17.9
mmap_array.h
Go to the documentation of this file.
1 
16 #ifndef GRAPHSCOPE_UTILS_MMAP_ARRAY_H_
17 #define GRAPHSCOPE_UTILS_MMAP_ARRAY_H_
18 
19 #include <assert.h>
20 
21 #include <atomic>
22 #include <filesystem>
23 #include <string>
24 #include <string_view>
25 
27 #include "glog/logging.h"
28 #include "grape/util.h"
29 
30 #ifdef __ia64__
31 #define ADDR (void*) (0x8000000000000000UL)
32 #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
33 #else
34 #define ADDR (void*) (0x0UL)
35 #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
36 #endif
37 
38 #define PROTECTION (PROT_READ | PROT_WRITE)
39 
40 #define HUGEPAGE_SIZE (2UL * 1024 * 1024)
41 #define HUGEPAGE_MASK (2UL * 1024 * 1024 - 1UL)
42 #define ROUND_UP(size) (((size) + HUGEPAGE_MASK) & (~HUGEPAGE_MASK))
43 
44 inline void* allocate_hugepages(size_t size) {
45  return mmap(ADDR, ROUND_UP(size), PROTECTION, FLAGS, -1, 0);
46 }
47 
48 inline size_t hugepage_round_up(size_t size) { return ROUND_UP(size); }
49 
50 #undef ADDR
51 #undef FLAGS
52 #undef HUGEPAGE_SIZE
53 #undef HUGEPAGE_MASK
54 #undef ROUND_UP
55 
56 namespace gs {
57 
58 enum class MemoryStrategy {
62 };
63 
64 template <typename T>
65 class mmap_array {
66  public:
68  : filename_(""),
69  fd_(-1),
70  data_(NULL),
71  size_(0),
72  mmap_size_(0),
73  sync_to_file_(false),
74  hugepage_prefered_(false) {}
75 
76  mmap_array(const mmap_array<T>& rhs) : fd_(-1) {
77  resize(rhs.size_);
78  memcpy(data_, rhs.data_, size_ * sizeof(T));
79  }
80 
81  mmap_array(mmap_array&& rhs) : mmap_array() { swap(rhs); }
82  ~mmap_array() { reset(); }
83 
84  void reset() {
85  if (data_ != NULL && mmap_size_ != 0) {
86  if (munmap(data_, mmap_size_) != 0) {
87  std::stringstream ss;
88  ss << "Failed to mummap file [ " << filename_ << " ] "
89  << strerror(errno);
90  LOG(ERROR) << ss.str();
91  throw std::runtime_error(ss.str());
92  }
93  }
94  data_ = NULL;
95  size_ = 0;
96  mmap_size_ = 0;
97  if (fd_ != -1) {
98  if (close(fd_) != 0) {
99  std::stringstream ss;
100  ss << "Failed to close file [ " << filename_ << " ] "
101  << strerror(errno);
102  LOG(ERROR) << ss.str();
103  throw std::runtime_error(ss.str());
104  }
105  fd_ = -1;
106  }
107  filename_ = "";
108  sync_to_file_ = false;
109  }
110 
111  void unlink() {
112  std::string old_filename = filename_;
113  reset();
114  if (old_filename != "" && std::filesystem::exists(old_filename)) {
115  if (std::filesystem::remove(old_filename) == 0) {
116  std::stringstream ss;
117  ss << "Failed to remove file [ " << old_filename << " ] "
118  << strerror(errno);
119  LOG(ERROR) << ss.str();
120  throw std::runtime_error(ss.str());
121  }
122  }
123  }
124 
125  void set_hugepage_prefered(bool val) {
126  hugepage_prefered_ = (val && !sync_to_file_);
127  }
128 
129  void open(const std::string& filename, bool sync_to_file = false) {
130  reset();
132  sync_to_file_ = sync_to_file;
133  hugepage_prefered_ = false;
134  if (sync_to_file_) {
135  bool creat = !std::filesystem::exists(filename_);
136  fd_ = ::open(filename_.c_str(), O_RDWR | O_CREAT, 0777);
137  if (fd_ == -1) {
138  std::stringstream ss;
139  ss << "Failed to open file [" << filename_ << "], " << strerror(errno);
140  LOG(ERROR) << ss.str();
141  throw std::runtime_error(ss.str());
142  }
143  if (creat) {
144  std::filesystem::perms readWritePermission =
145  std::filesystem::perms::owner_read |
146  std::filesystem::perms::owner_write;
147  std::error_code errorCode;
148  std::filesystem::permissions(filename, readWritePermission,
149  std::filesystem::perm_options::add,
150  errorCode);
151  if (errorCode) {
152  std::stringstream ss;
153  ss << "Failed to set read/write permission for file: " << filename
154  << " " << errorCode.message() << std::endl;
155  LOG(ERROR) << ss.str();
156  throw std::runtime_error(ss.str());
157  }
158  }
159 
160  size_t file_size = std::filesystem::file_size(filename_);
161  size_ = file_size / sizeof(T);
162  mmap_size_ = file_size;
163  if (mmap_size_ == 0) {
164  data_ = NULL;
165  } else {
166  data_ = reinterpret_cast<T*>(
167  mmap(NULL, mmap_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
168  if (data_ == MAP_FAILED) {
169  std::stringstream ss;
170  ss << "Failed to mmap file [" << filename_ << "], "
171  << strerror(errno);
172  LOG(ERROR) << ss.str();
173  throw std::runtime_error(ss.str());
174  }
175  int rt = madvise(data_, mmap_size_, MADV_RANDOM | MADV_WILLNEED);
176  if (rt != 0) {
177  std::stringstream ss;
178  ss << "Failed to madvise file [" << filename_ << "], "
179  << strerror(errno);
180  LOG(ERROR) << ss.str();
181  throw std::runtime_error(ss.str());
182  }
183  }
184  } else {
185  if (!filename_.empty() && std::filesystem::exists(filename_)) {
186  size_t file_size = std::filesystem::file_size(filename_);
187  fd_ = ::open(filename_.c_str(), O_RDWR, 0777);
188  if (fd_ == -1) {
189  std::stringstream ss;
190  ss << "Failed to open file [" << filename_ << "], "
191  << strerror(errno);
192  LOG(ERROR) << ss.str();
193  throw std::runtime_error(ss.str());
194  }
195  size_ = file_size / sizeof(T);
196  mmap_size_ = file_size;
197  if (mmap_size_ == 0) {
198  data_ = NULL;
199  } else {
200  data_ = reinterpret_cast<T*>(mmap(
201  NULL, mmap_size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_, 0));
202  if (data_ == MAP_FAILED) {
203  std::stringstream ss;
204  ss << "Failed to mmap file [" << filename_ << "], "
205  << strerror(errno);
206  LOG(ERROR) << ss.str();
207  throw std::runtime_error(ss.str());
208  }
209  }
210  }
211  }
212  }
213 
214  void open_with_hugepages(const std::string& filename, size_t capacity = 0) {
215  reset();
216  hugepage_prefered_ = true;
217  if (!filename.empty() && std::filesystem::exists(filename)) {
218  size_t file_size = std::filesystem::file_size(filename);
219  size_ = file_size / sizeof(T);
220  if (size_ != 0) {
221  capacity = std::max(capacity, size_);
222  mmap_size_ = hugepage_round_up(capacity * sizeof(T));
223  data_ = static_cast<T*>(allocate_hugepages(mmap_size_));
224  if (data_ != MAP_FAILED) {
225  FILE* fin = fopen(filename.c_str(), "rb");
226  if (fin == NULL) {
227  std::stringstream ss;
228  ss << "Failed to open file [ " << filename << " ], "
229  << strerror(errno);
230  LOG(ERROR) << ss.str();
231  throw std::runtime_error(ss.str());
232  }
233  if (fread(data_, sizeof(T), size_, fin) != size_) {
234  std::stringstream ss;
235  ss << "Failed to fread file [ " << filename << " ], "
236  << strerror(errno);
237  LOG(ERROR) << ss.str();
238  throw std::runtime_error(ss.str());
239  }
240  if (fclose(fin) != 0) {
241  std::stringstream ss;
242  ss << "Failed to fclose file [ " << filename << " ], "
243  << strerror(errno);
244  LOG(ERROR) << ss.str();
245  throw std::runtime_error(ss.str());
246  }
247  } else {
248  LOG(ERROR) << "allocating hugepage failed, " << strerror(errno)
249  << ", try with normal pages";
250  data_ = NULL;
251  open(filename, false);
252  }
253  } else {
254  mmap_size_ = 0;
255  }
256  }
257  }
258 
259  void dump(const std::string& filename) {
260  if (sync_to_file_) {
261  std::string old_filename = filename_;
262  reset();
263  std::error_code errorCode;
264  std::filesystem::rename(old_filename, filename, errorCode);
265  if (errorCode) {
266  std::stringstream ss;
267  ss << "Failed to rename file " << old_filename << " to " << filename
268  << " " << errorCode.message() << std::endl;
269  LOG(ERROR) << ss.str();
270  throw std::runtime_error(ss.str());
271  }
272  } else {
273  FILE* fout = fopen(filename.c_str(), "wb");
274  if (fout == NULL) {
275  std::stringstream ss;
276  ss << "Failed to open file [ " << filename << " ], " << strerror(errno);
277  LOG(ERROR) << ss.str();
278  throw std::runtime_error(ss.str());
279  }
280  if (fwrite(data_, sizeof(T), size_, fout) != size_) {
281  std::stringstream ss;
282  ss << "Failed to fwrite file [ " << filename << " ], "
283  << strerror(errno);
284  LOG(ERROR) << ss.str();
285  throw std::runtime_error(ss.str());
286  }
287  if (fflush(fout) != 0) {
288  std::stringstream ss;
289  ss << "Failed to fflush file [ " << filename << " ], "
290  << strerror(errno);
291  LOG(ERROR) << ss.str();
292  throw std::runtime_error(ss.str());
293  }
294  if (fclose(fout) != 0) {
295  std::stringstream ss;
296  ss << "Failed to fclose file [ " << filename << " ], "
297  << strerror(errno);
298  LOG(ERROR) << ss.str();
299  throw std::runtime_error(ss.str());
300  }
301  reset();
302  }
303 
304  std::filesystem::perms readPermission = std::filesystem::perms::owner_read;
305 
306  std::error_code errorCode;
307  std::filesystem::permissions(filename, readPermission,
308  std::filesystem::perm_options::add, errorCode);
309 
310  if (errorCode) {
311  std::stringstream ss;
312  ss << "Failed to set read permission for file: " << filename << " "
313  << errorCode.message() << std::endl;
314  LOG(ERROR) << ss.str();
315  throw std::runtime_error(ss.str());
316  }
317  }
318 
319  void resize(size_t size) {
320  if (size == size_) {
321  return;
322  }
323 
324  if (sync_to_file_) {
325  if (data_ != NULL && mmap_size_ != 0) {
326  if (munmap(data_, mmap_size_) != 0) {
327  std::stringstream ss;
328  ss << "Failed to mummap file [ " << filename_ << " ], "
329  << strerror(errno);
330  LOG(ERROR) << ss.str();
331  throw std::runtime_error(ss.str());
332  }
333  }
334  size_t new_mmap_size = size * sizeof(T);
335  int rt = ftruncate(fd_, new_mmap_size);
336  if (rt == -1) {
337  std::stringstream ss;
338  ss << "Failed to ftruncate " << rt << ", " << strerror(errno);
339  LOG(ERROR) << ss.str();
340  throw std::runtime_error(ss.str());
341  }
342  if (new_mmap_size == 0) {
343  data_ = NULL;
344  } else {
345  data_ = reinterpret_cast<T*>(mmap(
346  NULL, new_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
347  if (data_ == MAP_FAILED) {
348  std::stringstream ss;
349  ss << "Failed to mmap, " << strerror(errno);
350  LOG(ERROR) << ss.str();
351  throw std::runtime_error(ss.str());
352  }
353  }
354  size_ = size;
355  mmap_size_ = new_mmap_size;
356  } else {
357  size_t target_mmap_size = size * sizeof(T);
358  if (target_mmap_size <= mmap_size_) {
359  size_ = size;
360  } else {
361  T* new_data = NULL;
362  size_t new_mmap_size = size * sizeof(T);
363  if (hugepage_prefered_) {
364  new_data = reinterpret_cast<T*>(allocate_hugepages(new_mmap_size));
365  if (new_data == MAP_FAILED) {
366  LOG(ERROR) << "mmap with hugepage failed, " << strerror(errno)
367  << ", try with normal pages";
368  new_data = NULL;
369  } else {
370  new_mmap_size = hugepage_round_up(new_mmap_size);
371  }
372  }
373  if (new_data == NULL) {
374  new_data = reinterpret_cast<T*>(
375  mmap(NULL, new_mmap_size, PROT_READ | PROT_WRITE,
376  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
377  if (new_data == MAP_FAILED) {
378  std::stringstream ss;
379  ss << "mmap failed " << strerror(errno);
380  LOG(ERROR) << ss.str();
381  throw std::runtime_error(ss.str());
382  }
383  }
384 
385  size_t copy_size = std::min(size, size_);
386  if (copy_size > 0 && data_ != NULL) {
387  memcpy(reinterpret_cast<void*>(new_data),
388  reinterpret_cast<void*>(data_), copy_size * sizeof(T));
389  }
390 
391  reset();
392 
393  data_ = new_data;
394  size_ = size;
395  mmap_size_ = new_mmap_size;
396  }
397  }
398  }
399 
400  void touch(const std::string& filename) {
401  dump(filename);
402  open(filename, true);
403  }
404 
405  T* data() { return data_; }
406  const T* data() const { return data_; }
407 
408  void set(size_t idx, const T& val) { data_[idx] = val; }
409 
410  const T& get(size_t idx) const { return data_[idx]; }
411 
412  const T& operator[](size_t idx) const { return data_[idx]; }
413  T& operator[](size_t idx) { return data_[idx]; }
414 
415  size_t size() const { return size_; }
416 
417  void swap(mmap_array<T>& rhs) {
418  std::swap(filename_, rhs.filename_);
419  std::swap(fd_, rhs.fd_);
420  std::swap(data_, rhs.data_);
421  std::swap(size_, rhs.size_);
422  std::swap(mmap_size_, rhs.mmap_size_);
423  std::swap(hugepage_prefered_, rhs.hugepage_prefered_);
424  std::swap(sync_to_file_, rhs.sync_to_file_);
425  }
426 
427  const std::string& filename() const { return filename_; }
428 
429  private:
430  std::string filename_;
431  int fd_;
432  T* data_;
433  size_t size_;
434 
435  size_t mmap_size_;
436 
439 };
440 
441 struct string_item {
442  uint64_t offset : 48;
443  uint32_t length : 16;
444 };
445 
446 template <>
447 class mmap_array<std::string_view> {
448  public:
450  mmap_array(mmap_array&& rhs) : mmap_array() { swap(rhs); }
452 
453  void reset() {
454  items_.reset();
455  data_.reset();
456  }
457 
458  void set_hugepage_prefered(bool val) {
459  items_.set_hugepage_prefered(val);
460  data_.set_hugepage_prefered(val);
461  }
462 
463  void open(const std::string& filename, bool sync_to_file) {
464  items_.open(filename + ".items", sync_to_file);
465  data_.open(filename + ".data", sync_to_file);
466  }
467 
468  void open_with_hugepages(const std::string& filename) {
469  items_.open_with_hugepages(filename + ".items");
470  data_.open_with_hugepages(filename + ".data");
471  }
472 
473  void touch(const std::string& filename) {
474  items_.touch(filename + ".items");
475  data_.touch(filename + ".data");
476  }
477 
478  void dump(const std::string& filename) {
479  items_.dump(filename + ".items");
480  data_.dump(filename + ".data");
481  }
482 
483  void resize(size_t size, size_t data_size) {
484  items_.resize(size);
485  data_.resize(data_size);
486  }
487 
488  void set(size_t idx, size_t offset, const std::string_view& val) {
489  items_.set(idx, {offset, static_cast<uint32_t>(val.size())});
490  memcpy(data_.data() + offset, val.data(), val.size());
491  }
492 
493  std::string_view get(size_t idx) const {
494  const string_item& item = items_.get(idx);
495  return std::string_view(data_.data() + item.offset, item.length);
496  }
497 
498  size_t size() const { return items_.size(); }
499 
500  size_t data_size() const { return data_.size(); }
501 
502  void swap(mmap_array& rhs) {
503  items_.swap(rhs.items_);
504  data_.swap(rhs.data_);
505  }
506  void unlink() {
507  items_.unlink();
508  data_.unlink();
509  }
510 
511  private:
514 };
515 
516 } // namespace gs
517 
518 #endif // GRAPHSCOPE_UTILS_MMAP_ARRAY_H_
PROTECTION
#define PROTECTION
Definition: mmap_array.h:38
gs::MemoryStrategy
MemoryStrategy
Definition: mmap_array.h:58
gs::mmap_array< std::string_view >::resize
void resize(size_t size, size_t data_size)
Definition: mmap_array.h:483
gs::mmap_array< std::string_view >::size
size_t size() const
Definition: mmap_array.h:498
gs::mmap_array< std::string_view >::items_
mmap_array< string_item > items_
Definition: mmap_array.h:512
gs::string_item::length
uint32_t length
Definition: mmap_array.h:443
gs::mmap_array::mmap_array
mmap_array(mmap_array &&rhs)
Definition: mmap_array.h:81
gs::mmap_array::~mmap_array
~mmap_array()
Definition: mmap_array.h:82
gs::mmap_array< std::string_view >::open
void open(const std::string &filename, bool sync_to_file)
Definition: mmap_array.h:463
gs::mmap_array< std::string_view >::open_with_hugepages
void open_with_hugepages(const std::string &filename)
Definition: mmap_array.h:468
file_names.h
gs::mmap_array< std::string_view >::data_size
size_t data_size() const
Definition: mmap_array.h:500
gs::mmap_array::get
const T & get(size_t idx) const
Definition: mmap_array.h:410
allocate_hugepages
void * allocate_hugepages(size_t size)
Definition: mmap_array.h:44
gs::mmap_array::operator[]
const T & operator[](size_t idx) const
Definition: mmap_array.h:412
gs::mmap_array::resize
void resize(size_t size)
Definition: mmap_array.h:319
gs::mmap_array< std::string_view >::set_hugepage_prefered
void set_hugepage_prefered(bool val)
Definition: mmap_array.h:458
gs::mmap_array::sync_to_file_
bool sync_to_file_
Definition: mmap_array.h:437
gs
Definition: adj_list.h:23
gs::mmap_array< std::string_view >::reset
void reset()
Definition: mmap_array.h:453
gs::mmap_array::data_
T * data_
Definition: mmap_array.h:432
gs::mmap_array::open
void open(const std::string &filename, bool sync_to_file=false)
Definition: mmap_array.h:129
gs::string_item
Definition: mmap_array.h:441
gs::mmap_array::set_hugepage_prefered
void set_hugepage_prefered(bool val)
Definition: mmap_array.h:125
gs::mmap_array::swap
void swap(mmap_array< T > &rhs)
Definition: mmap_array.h:417
gs::mmap_array::data
T * data()
Definition: mmap_array.h:405
gs::MemoryStrategy::kHugepagePrefered
@ kHugepagePrefered
gs::mmap_array::unlink
void unlink()
Definition: mmap_array.h:111
gs::mmap_array::size
size_t size() const
Definition: mmap_array.h:415
gs::mmap_array< std::string_view >::unlink
void unlink()
Definition: mmap_array.h:506
gs::mmap_array< std::string_view >::get
std::string_view get(size_t idx) const
Definition: mmap_array.h:493
gs::MemoryStrategy::kSyncToFile
@ kSyncToFile
gs::mmap_array::size_
size_t size_
Definition: mmap_array.h:433
gs::mmap_array::data
const T * data() const
Definition: mmap_array.h:406
ADDR
#define ADDR
Definition: mmap_array.h:34
FLAGS
#define FLAGS
Definition: mmap_array.h:35
gs::mmap_array::mmap_size_
size_t mmap_size_
Definition: mmap_array.h:435
gs::mmap_array::dump
void dump(const std::string &filename)
Definition: mmap_array.h:259
gs::string_item::offset
uint64_t offset
Definition: mmap_array.h:442
gs::mmap_array< std::string_view >::dump
void dump(const std::string &filename)
Definition: mmap_array.h:478
gs::mmap_array::fd_
int fd_
Definition: mmap_array.h:431
gs::mmap_array< std::string_view >::swap
void swap(mmap_array &rhs)
Definition: mmap_array.h:502
ROUND_UP
#define ROUND_UP(size)
Definition: mmap_array.h:42
gs::mmap_array
Definition: mmap_array.h:65
gs::mmap_array::touch
void touch(const std::string &filename)
Definition: mmap_array.h:400
std
Definition: loading_config.h:232
gs::mmap_array::filename
const std::string & filename() const
Definition: mmap_array.h:427
gs::mmap_array< std::string_view >::mmap_array
mmap_array()
Definition: mmap_array.h:449
gs::mmap_array::filename_
std::string filename_
Definition: mmap_array.h:430
gs::mmap_array::operator[]
T & operator[](size_t idx)
Definition: mmap_array.h:413
gs::MemoryStrategy::kMemoryOnly
@ kMemoryOnly
gs::mmap_array::open_with_hugepages
void open_with_hugepages(const std::string &filename, size_t capacity=0)
Definition: mmap_array.h:214
gs::mmap_array::mmap_array
mmap_array(const mmap_array< T > &rhs)
Definition: mmap_array.h:76
gs::mmap_array< std::string_view >::set
void set(size_t idx, size_t offset, const std::string_view &val)
Definition: mmap_array.h:488
gs::mmap_array< std::string_view >::data_
mmap_array< char > data_
Definition: mmap_array.h:513
gs::mmap_array< std::string_view >::mmap_array
mmap_array(mmap_array &&rhs)
Definition: mmap_array.h:450
gs::mmap_array< std::string_view >::~mmap_array
~mmap_array()
Definition: mmap_array.h:451
gs::mmap_array< std::string_view >::touch
void touch(const std::string &filename)
Definition: mmap_array.h:473
gs::mmap_array::mmap_array
mmap_array()
Definition: mmap_array.h:67
gs::mmap_array::reset
void reset()
Definition: mmap_array.h:84
gs::mmap_array::set
void set(size_t idx, const T &val)
Definition: mmap_array.h:408
gs::mmap_array::hugepage_prefered_
bool hugepage_prefered_
Definition: mmap_array.h:438
hugepage_round_up
size_t hugepage_round_up(size_t size)
Definition: mmap_array.h:48