Caffe
cudnn_conv_layer.hpp
1 #ifndef CAFFE_CUDNN_CONV_LAYER_HPP_
2 #define CAFFE_CUDNN_CONV_LAYER_HPP_
3 
4 #include <vector>
5 
6 #include "caffe/blob.hpp"
7 #include "caffe/layer.hpp"
8 #include "caffe/proto/caffe.pb.h"
9 
10 #include "caffe/layers/conv_layer.hpp"
11 
12 namespace caffe {
13 
14 #ifdef USE_CUDNN
15 /*
16  * @brief cuDNN implementation of ConvolutionLayer.
17  * Fallback to ConvolutionLayer for CPU mode.
18  *
19  * cuDNN accelerates convolution through forward kernels for filtering and bias
20  * plus backward kernels for the gradient w.r.t. the filters, biases, and
21  * inputs. Caffe + cuDNN further speeds up the computation through forward
22  * parallelism across groups and backward parallelism across gradients.
23  *
24  * The CUDNN engine does not have memory overhead for matrix buffers. For many
25  * input and filter regimes the CUDNN engine is faster than the CAFFE engine,
26  * but for fully-convolutional models and large inputs the CAFFE engine can be
27  * faster as long as it fits in memory.
28 */
29 template <typename Dtype>
30 class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
31  public:
32  explicit CuDNNConvolutionLayer(const LayerParameter& param)
33  : ConvolutionLayer<Dtype>(param), handles_setup_(false) {}
34  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
35  const vector<Blob<Dtype>*>& top);
36  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
37  const vector<Blob<Dtype>*>& top);
38  virtual ~CuDNNConvolutionLayer();
39 
40  protected:
41  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
42  const vector<Blob<Dtype>*>& top);
43  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
44  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
45 
46  bool handles_setup_;
47  cudnnHandle_t* handle_;
48  cudaStream_t* stream_;
49 
50  // algorithms for forward and backwards convolutions
51  cudnnConvolutionFwdAlgo_t *fwd_algo_;
52  cudnnConvolutionBwdFilterAlgo_t *bwd_filter_algo_;
53  cudnnConvolutionBwdDataAlgo_t *bwd_data_algo_;
54 
55  vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
56  cudnnTensorDescriptor_t bias_desc_;
57  cudnnFilterDescriptor_t filter_desc_;
58  vector<cudnnConvolutionDescriptor_t> conv_descs_;
59  int bottom_offset_, top_offset_, bias_offset_;
60 
61  size_t *workspace_fwd_sizes_;
62  size_t *workspace_bwd_data_sizes_;
63  size_t *workspace_bwd_filter_sizes_;
64  size_t workspaceSizeInBytes; // size of underlying storage
65  void *workspaceData; // underlying storage
66  void **workspace; // aliases into workspaceData
67 };
68 #endif
69 
70 } // namespace caffe
71 
72 #endif // CAFFE_CUDNN_CONV_LAYER_HPP_
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14