Caffe
device_alternate.hpp
1 #ifndef CAFFE_UTIL_DEVICE_ALTERNATE_H_
2 #define CAFFE_UTIL_DEVICE_ALTERNATE_H_
3 
4 #ifdef CPU_ONLY // CPU-only Caffe.
5 
6 #include <vector>
7 
8 // Stub out GPU calls as unavailable.
9 
10 #define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode."
11 
12 #define STUB_GPU(classname) \
13 template <typename Dtype> \
14 void classname<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, \
15  const vector<Blob<Dtype>*>& top) { NO_GPU; } \
16 template <typename Dtype> \
17 void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
18  const vector<bool>& propagate_down, \
19  const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
20 
21 #define STUB_GPU_FORWARD(classname, funcname) \
22 template <typename Dtype> \
23 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& bottom, \
24  const vector<Blob<Dtype>*>& top) { NO_GPU; } \
25 
26 #define STUB_GPU_BACKWARD(classname, funcname) \
27 template <typename Dtype> \
28 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
29  const vector<bool>& propagate_down, \
30  const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
31 
32 #else // Normal GPU + CPU Caffe.
33 
34 #include <cublas_v2.h>
35 #include <cuda.h>
36 #include <cuda_runtime.h>
37 #include <curand.h>
38 #include <driver_types.h> // cuda driver types
39 #ifdef USE_CUDNN // cuDNN acceleration library.
40 #include "caffe/util/cudnn.hpp"
41 #endif
42 
43 //
44 // CUDA macros
45 //
46 
47 // CUDA: various checks for different function calls.
48 #define CUDA_CHECK(condition) \
49  /* Code block avoids redefinition of cudaError_t error */ \
50  do { \
51  cudaError_t error = condition; \
52  CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
53  } while (0)
54 
55 #define CUBLAS_CHECK(condition) \
56  do { \
57  cublasStatus_t status = condition; \
58  CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
59  << caffe::cublasGetErrorString(status); \
60  } while (0)
61 
62 #define CURAND_CHECK(condition) \
63  do { \
64  curandStatus_t status = condition; \
65  CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
66  << caffe::curandGetErrorString(status); \
67  } while (0)
68 
69 // CUDA: grid stride looping
70 #define CUDA_KERNEL_LOOP(i, n) \
71  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
72  i < (n); \
73  i += blockDim.x * gridDim.x)
74 
75 // CUDA: check for error after kernel execution and exit loudly if there is one.
76 #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
77 
78 namespace caffe {
79 
80 // CUDA: library error reporting.
81 const char* cublasGetErrorString(cublasStatus_t error);
82 const char* curandGetErrorString(curandStatus_t error);
83 
84 // CUDA: use 512 threads per block
85 const int CAFFE_CUDA_NUM_THREADS = 512;
86 
87 // CUDA: number of blocks for threads.
88 inline int CAFFE_GET_BLOCKS(const int N) {
89  return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
90 }
91 
92 } // namespace caffe
93 
94 #endif // CPU_ONLY
95 
96 #endif // CAFFE_UTIL_DEVICE_ALTERNATE_H_
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14