Caffe
parallel.hpp
1 #ifndef CAFFE_PARALLEL_HPP_
2 #define CAFFE_PARALLEL_HPP_
3 
4 #ifdef USE_NCCL
5 
6 #include <boost/thread.hpp>
7 
8 #include <string>
9 #include <vector>
10 
11 #include "caffe/blob.hpp"
12 #include "caffe/common.hpp"
13 #include "caffe/internal_thread.hpp"
14 #include "caffe/layer.hpp"
15 #include "caffe/proto/caffe.pb.h"
16 #include "caffe/solver.hpp"
17 #include "caffe/syncedmem.hpp"
18 #include "caffe/util/blocking_queue.hpp"
19 #include "caffe/util/nccl.hpp"
20 
21 namespace caffe {
22 
23 // Represents a net parameters. Once a net is created, its parameter buffers can
24 // be replaced by ones from Params, to allow parallelization. Params ensures
25 // parameters are allocated in one consecutive array.
26 template<typename Dtype>
27 class Params {
28  public:
29  explicit Params(shared_ptr<Solver<Dtype> > root_solver);
30  virtual ~Params() {
31  }
32 
33  inline size_t size() const {
34  return size_;
35  }
36  inline Dtype* data() const {
37  return data_;
38  }
39  inline Dtype* diff() const {
40  return diff_;
41  }
42 
43  protected:
44  const size_t size_; // Size of buffers
45  Dtype* data_; // Network parameters
46  Dtype* diff_; // Gradient
47 
48 DISABLE_COPY_AND_ASSIGN(Params);
49 };
50 
51 // Params stored in GPU memory.
52 template<typename Dtype>
53 class GPUParams : public Params<Dtype> {
54  public:
55  GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device);
56  virtual ~GPUParams();
57 
58  void Configure(Solver<Dtype>* solver) const;
59 
60  protected:
61  using Params<Dtype>::size_;
62  using Params<Dtype>::data_;
63  using Params<Dtype>::diff_;
64 };
65 
66 template<typename Dtype>
67 class NCCL : public GPUParams<Dtype>,
68  public Solver<Dtype>::Callback,
69  public Net<Dtype>::Callback {
70  public:
74  explicit NCCL(shared_ptr<Solver<Dtype> > solver);
79  NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid);
80  ~NCCL();
81 
82  boost::barrier* barrier();
83  void set_barrier(boost::barrier* value);
84 
89  static void InitSingleProcess(vector<NCCL<Dtype>*>* nccls);
90 
91  static string new_uid();
92 
96  void Broadcast();
97 
101  void Run(const vector<int>& gpus, const char* restore);
102 
103  protected:
104  void Init();
105  void on_start() {}
106  void run(int layer); // Net callback
107  void on_gradients_ready();
108 
109  ncclComm_t comm_;
110  cudaStream_t stream_;
111 
112  shared_ptr<Solver<Dtype> > solver_;
113  // Should not be necessary, https://github.com/NVIDIA/nccl/issues/37
114  boost::barrier* barrier_;
115  using Params<Dtype>::size_;
116  using Params<Dtype>::data_;
117  using Params<Dtype>::diff_;
118 };
119 
120 } // namespace caffe
121 
122 #endif // USE_NCCL
123 #endif // header
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14