Caffe
lstm_layer.hpp
1 #ifndef CAFFE_LSTM_LAYER_HPP_
2 #define CAFFE_LSTM_LAYER_HPP_
3 
4 #include <string>
5 #include <utility>
6 #include <vector>
7 
8 #include "caffe/blob.hpp"
9 #include "caffe/common.hpp"
10 #include "caffe/layer.hpp"
11 #include "caffe/layers/recurrent_layer.hpp"
12 #include "caffe/net.hpp"
13 #include "caffe/proto/caffe.pb.h"
14 
15 namespace caffe {
16 
17 template <typename Dtype> class RecurrentLayer;
18 
47 template <typename Dtype>
48 class LSTMLayer : public RecurrentLayer<Dtype> {
49  public:
50  explicit LSTMLayer(const LayerParameter& param)
51  : RecurrentLayer<Dtype>(param) {}
52 
53  virtual inline const char* type() const { return "LSTM"; }
54 
55  protected:
56  virtual void FillUnrolledNet(NetParameter* net_param) const;
57  virtual void RecurrentInputBlobNames(vector<string>* names) const;
58  virtual void RecurrentOutputBlobNames(vector<string>* names) const;
59  virtual void RecurrentInputShapes(vector<BlobShape>* shapes) const;
60  virtual void OutputBlobNames(vector<string>* names) const;
61 };
62 
68 template <typename Dtype>
69 class LSTMUnitLayer : public Layer<Dtype> {
70  public:
71  explicit LSTMUnitLayer(const LayerParameter& param)
72  : Layer<Dtype>(param) {}
73  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
74  const vector<Blob<Dtype>*>& top);
75 
76  virtual inline const char* type() const { return "LSTMUnit"; }
77  virtual inline int ExactNumBottomBlobs() const { return 3; }
78  virtual inline int ExactNumTopBlobs() const { return 2; }
79 
80  virtual inline bool AllowForceBackward(const int bottom_index) const {
81  // Can't propagate to sequence continuation indicators.
82  return bottom_index != 2;
83  }
84 
85  protected:
106  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
107  const vector<Blob<Dtype>*>& top);
108  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
109  const vector<Blob<Dtype>*>& top);
110 
142  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
143  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
144  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
145  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
146 
149  Blob<Dtype> X_acts_;
150 };
151 
152 } // namespace caffe
153 
154 #endif // CAFFE_LSTM_LAYER_HPP_
virtual void Backward_gpu(const vector< Blob< Dtype > *> &top, const vector< bool > &propagate_down, const vector< Blob< Dtype > *> &bottom)
Using the GPU device, compute the gradients for any parameters and for the bottom blobs if propagate_...
Definition: layer.hpp:334
Processes sequential inputs using a "Long Short-Term Memory" (LSTM) [1] style recurrent neural networ...
Definition: lstm_layer.hpp:48
virtual void RecurrentInputShapes(vector< BlobShape > *shapes) const
Fills shapes with the shapes of the recurrent input Blob&s. Subclasses should define this – see RNNL...
Definition: lstm_layer.cpp:28
An interface for the units of computation which can be composed into a Net.
Definition: layer.hpp:33
virtual void RecurrentOutputBlobNames(vector< string > *names) const
Fills names with the names of the Tth timestep recurrent output Blob&s. Subclasses should define this...
Definition: lstm_layer.cpp:21
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14
An abstract class for implementing recurrent behavior inside of an unrolled network. This Layer type cannot be instantiated – instead, you should use one of its implementations which defines the recurrent architecture, such as RNNLayer or LSTMLayer.
Definition: lstm_layer.hpp:17
virtual void Forward_cpu(const vector< Blob< Dtype > *> &bottom, const vector< Blob< Dtype > *> &top)
Definition: recurrent_layer.cpp:245
virtual void Forward_gpu(const vector< Blob< Dtype > *> &bottom, const vector< Blob< Dtype > *> &top)
Using the GPU device, compute the layer output. Fall back to Forward_cpu() if unavailable.
virtual void RecurrentInputBlobNames(vector< string > *names) const
Fills names with the names of the 0th timestep recurrent input Blob&s. Subclasses should define this ...
Definition: lstm_layer.cpp:14
virtual void FillUnrolledNet(NetParameter *net_param) const
Fills net_param with the recurrent network architecture. Subclasses should define this – see RNNLaye...
Definition: lstm_layer.cpp:47
virtual const char * type() const
Returns the layer type.
Definition: lstm_layer.hpp:76
virtual int ExactNumBottomBlobs() const
Returns the exact number of bottom blobs required by the layer, or -1 if no exact number is required...
Definition: lstm_layer.hpp:77
virtual bool AllowForceBackward(const int bottom_index) const
Return whether to allow force_backward for a given bottom blob index.
Definition: lstm_layer.hpp:80
int hidden_dim_
The hidden and output dimension.
Definition: lstm_layer.hpp:148
virtual void OutputBlobNames(vector< string > *names) const
Fills names with the names of the output blobs, concatenated across all timesteps. Should return a name for each top Blob. Subclasses should define this – see RNNLayer and LSTMLayer for examples.
Definition: lstm_layer.cpp:41
virtual const char * type() const
Returns the layer type.
Definition: lstm_layer.hpp:53
virtual void Reshape(const vector< Blob< Dtype > *> &bottom, const vector< Blob< Dtype > *> &top)
Adjust the shapes of top blobs and internal buffers to accommodate the shapes of the bottom blobs...
Definition: recurrent_layer.cpp:183
virtual void Backward_cpu(const vector< Blob< Dtype > *> &top, const vector< bool > &propagate_down, const vector< Blob< Dtype > *> &bottom)
Using the CPU device, compute the gradients for any parameters and for the bottom blobs if propagate_...
Definition: recurrent_layer.cpp:277
A helper for LSTMLayer: computes a single timestep of the non-linearity of the LSTM, producing the updated cell and hidden states.
Definition: lstm_layer.hpp:69
virtual int ExactNumTopBlobs() const
Returns the exact number of top blobs required by the layer, or -1 if no exact number is required...
Definition: lstm_layer.hpp:78
A wrapper around SyncedMemory holders serving as the basic computational unit through which Layers...
Definition: blob.hpp:24