Caffe
syncedmem.hpp
1 #ifndef CAFFE_SYNCEDMEM_HPP_
2 #define CAFFE_SYNCEDMEM_HPP_
3 
4 #include <cstdlib>
5 
6 #ifdef USE_MKL
7  #include "mkl.h"
8 #endif
9 
10 #include "caffe/common.hpp"
11 
12 namespace caffe {
13 
14 // If CUDA is available and in GPU mode, host memory will be allocated pinned,
15 // using cudaMallocHost. It avoids dynamic pinning for transfers (DMA).
16 // The improvement in performance seems negligible in the single GPU case,
17 // but might be more significant for parallel training. Most importantly,
18 // it improved stability for large models on many GPUs.
19 inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
20 #ifndef CPU_ONLY
21  if (Caffe::mode() == Caffe::GPU) {
22  CUDA_CHECK(cudaMallocHost(ptr, size));
23  *use_cuda = true;
24  return;
25  }
26 #endif
27 #ifdef USE_MKL
28  *ptr = mkl_malloc(size ? size:1, 64);
29 #else
30  *ptr = malloc(size);
31 #endif
32  *use_cuda = false;
33  CHECK(*ptr) << "host allocation of size " << size << " failed";
34 }
35 
36 inline void CaffeFreeHost(void* ptr, bool use_cuda) {
37 #ifndef CPU_ONLY
38  if (use_cuda) {
39  CUDA_CHECK(cudaFreeHost(ptr));
40  return;
41  }
42 #endif
43 #ifdef USE_MKL
44  mkl_free(ptr);
45 #else
46  free(ptr);
47 #endif
48 }
49 
50 
57 class SyncedMemory {
58  public:
59  SyncedMemory();
60  explicit SyncedMemory(size_t size);
61  ~SyncedMemory();
62  const void* cpu_data();
63  void set_cpu_data(void* data);
64  const void* gpu_data();
65  void set_gpu_data(void* data);
66  void* mutable_cpu_data();
67  void* mutable_gpu_data();
68  enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
69  SyncedHead head() { return head_; }
70  size_t size() { return size_; }
71 
72 #ifndef CPU_ONLY
73  void async_gpu_push(const cudaStream_t& stream);
74 #endif
75 
76  private:
77  void check_device();
78 
79  void to_cpu();
80  void to_gpu();
81  void* cpu_ptr_;
82  void* gpu_ptr_;
83  size_t size_;
84  SyncedHead head_;
85  bool own_cpu_data_;
86  bool cpu_malloc_use_cuda_;
87  bool own_gpu_data_;
88  int device_;
89 
90  DISABLE_COPY_AND_ASSIGN(SyncedMemory);
91 }; // class SyncedMemory
92 
93 } // namespace caffe
94 
95 #endif // CAFFE_SYNCEDMEM_HPP_
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14
Manages memory allocation and synchronization between the host (CPU) and device (GPU).
Definition: syncedmem.hpp:57