1 #ifndef CAFFE_UTIL_DEVICE_ALTERNATE_H_ 2 #define CAFFE_UTIL_DEVICE_ALTERNATE_H_ 4 #ifdef CPU_ONLY // CPU-only Caffe. 10 #define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode." 12 #define STUB_GPU(classname) \ 13 template <typename Dtype> \ 14 void classname<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, \ 15 const vector<Blob<Dtype>*>& top) { NO_GPU; } \ 16 template <typename Dtype> \ 17 void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \ 18 const vector<bool>& propagate_down, \ 19 const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \ 21 #define STUB_GPU_FORWARD(classname, funcname) \ 22 template <typename Dtype> \ 23 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& bottom, \ 24 const vector<Blob<Dtype>*>& top) { NO_GPU; } \ 26 #define STUB_GPU_BACKWARD(classname, funcname) \ 27 template <typename Dtype> \ 28 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \ 29 const vector<bool>& propagate_down, \ 30 const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \ 32 #else // Normal GPU + CPU Caffe. 34 #include <cublas_v2.h> 36 #include <cuda_runtime.h> 38 #include <driver_types.h> 39 #ifdef USE_CUDNN // cuDNN acceleration library. 40 #include "caffe/util/cudnn.hpp" 48 #define CUDA_CHECK(condition) \ 51 cudaError_t error = condition; \ 52 CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \ 55 #define CUBLAS_CHECK(condition) \ 57 cublasStatus_t status = condition; \ 58 CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \ 59 << caffe::cublasGetErrorString(status); \ 62 #define CURAND_CHECK(condition) \ 64 curandStatus_t status = condition; \ 65 CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \ 66 << caffe::curandGetErrorString(status); \ 70 #define CUDA_KERNEL_LOOP(i, n) \ 71 for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 73 i += blockDim.x * gridDim.x) 76 #define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError()) 81 const char* cublasGetErrorString(cublasStatus_t error);
82 const char* curandGetErrorString(curandStatus_t error);
85 const int CAFFE_CUDA_NUM_THREADS = 512;
88 inline int CAFFE_GET_BLOCKS(
const int N) {
89 return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
96 #endif // CAFFE_UTIL_DEVICE_ALTERNATE_H_ A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14