Caffe
math_functions.hpp
1 #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
2 #define CAFFE_UTIL_MATH_FUNCTIONS_H_
3 
4 #include <stdint.h>
5 #include <cmath> // for std::fabs and std::signbit
6 
7 #include "glog/logging.h"
8 
9 #include "caffe/common.hpp"
10 #include "caffe/util/device_alternate.hpp"
11 #include "caffe/util/mkl_alternate.hpp"
12 
13 namespace caffe {
14 
15 // Caffe gemm provides a simpler interface to the gemm functions, with the
16 // limitation that the data has to be contiguous in memory.
17 template <typename Dtype>
18 void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
19  const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
20  const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
21  Dtype* C);
22 
23 template <typename Dtype>
24 void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
25  const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
26  Dtype* y);
27 
28 template <typename Dtype>
29 void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
30  Dtype* Y);
31 
32 template <typename Dtype>
33 void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X,
34  const Dtype beta, Dtype* Y);
35 
36 template <typename Dtype>
37 void caffe_copy(const int N, const Dtype *X, Dtype *Y);
38 
39 template <typename Dtype>
40 void caffe_set(const int N, const Dtype alpha, Dtype *X);
41 
42 inline void caffe_memset(const size_t N, const int alpha, void* X) {
43  memset(X, alpha, N); // NOLINT(caffe/alt_fn)
44 }
45 
46 template <typename Dtype>
47 void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X);
48 
49 template <typename Dtype>
50 void caffe_scal(const int N, const Dtype alpha, Dtype *X);
51 
52 template <typename Dtype>
53 void caffe_sqr(const int N, const Dtype* a, Dtype* y);
54 
55 template <typename Dtype>
56 void caffe_sqrt(const int N, const Dtype* a, Dtype* y);
57 
58 template <typename Dtype>
59 void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
60 
61 template <typename Dtype>
62 void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
63 
64 template <typename Dtype>
65 void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
66 
67 template <typename Dtype>
68 void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
69 
70 template <typename Dtype>
71 void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
72 
73 unsigned int caffe_rng_rand();
74 
75 template <typename Dtype>
76 Dtype caffe_nextafter(const Dtype b);
77 
78 template <typename Dtype>
79 void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
80 
81 template <typename Dtype>
82 void caffe_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
83  Dtype* r);
84 
85 template <typename Dtype>
86 void caffe_rng_bernoulli(const int n, const Dtype p, int* r);
87 
88 template <typename Dtype>
89 void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r);
90 
91 template <typename Dtype>
92 void caffe_exp(const int n, const Dtype* a, Dtype* y);
93 
94 template <typename Dtype>
95 void caffe_log(const int n, const Dtype* a, Dtype* y);
96 
97 template <typename Dtype>
98 void caffe_abs(const int n, const Dtype* a, Dtype* y);
99 
100 template <typename Dtype>
101 Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y);
102 
103 template <typename Dtype>
104 Dtype caffe_cpu_strided_dot(const int n, const Dtype* x, const int incx,
105  const Dtype* y, const int incy);
106 
107 // Returns the sum of the absolute values of the elements of vector x
108 template <typename Dtype>
109 Dtype caffe_cpu_asum(const int n, const Dtype* x);
110 
111 // the branchless, type-safe version from
112 // http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c
113 template<typename Dtype>
114 inline int8_t caffe_sign(Dtype val) {
115  return (Dtype(0) < val) - (val < Dtype(0));
116 }
117 
118 // The following two macros are modifications of DEFINE_VSL_UNARY_FUNC
119 // in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp.
120 // Please refer to commit 7e8ef25c7 of the boost-eigen branch.
121 // Git cherry picking that commit caused a conflict hard to resolve and
122 // copying that file in convenient for code reviewing.
123 // So they have to be pasted here temporarily.
124 #define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \
125  template<typename Dtype> \
126  void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \
127  CHECK_GT(n, 0); CHECK(x); CHECK(y); \
128  for (int i = 0; i < n; ++i) { \
129  operation; \
130  } \
131  }
132 
133 // output is 1 for the positives, 0 for zero, and -1 for the negatives
134 DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]))
135 
136 // This returns a nonzero value if the input has its sign bit set.
137 // The name sngbit is meant to avoid conflicts with std::signbit in the macro.
138 // The extra parens are needed because CUDA < 6.5 defines signbit as a macro,
139 // and we don't want that to expand here when CUDA headers are also included.
140 DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \
141  y[i] = static_cast<bool>((std::signbit)(x[i])))
142 
143 DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]))
144 
145 template <typename Dtype>
146 void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
147 
148 #ifndef CPU_ONLY // GPU
149 
150 // Decaf gpu gemm provides an interface that is almost the same as the cpu
151 // gemm function - following the c convention and calling the fortran-order
152 // gpu code under the hood.
153 template <typename Dtype>
154 void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA,
155  const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
156  const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
157  Dtype* C);
158 
159 template <typename Dtype>
160 void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
161  const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
162  Dtype* y);
163 
164 template <typename Dtype>
165 void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X,
166  Dtype* Y);
167 
168 template <typename Dtype>
169 void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X,
170  const Dtype beta, Dtype* Y);
171 
172 void caffe_gpu_memcpy(const size_t N, const void *X, void *Y);
173 
174 template <typename Dtype>
175 void caffe_gpu_set(const int N, const Dtype alpha, Dtype *X);
176 
177 inline void caffe_gpu_memset(const size_t N, const int alpha, void* X) {
178 #ifndef CPU_ONLY
179  CUDA_CHECK(cudaMemset(X, alpha, N)); // NOLINT(caffe/alt_fn)
180 #else
181  NO_GPU;
182 #endif
183 }
184 
185 template <typename Dtype>
186 void caffe_gpu_add_scalar(const int N, const Dtype alpha, Dtype *X);
187 
188 template <typename Dtype>
189 void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X);
190 
191 #ifndef CPU_ONLY
192 template <typename Dtype>
193 void caffe_gpu_scal(const int N, const Dtype alpha, Dtype* X, cudaStream_t str);
194 #endif
195 
196 template <typename Dtype>
197 void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
198 
199 template <typename Dtype>
200 void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
201 
202 template <typename Dtype>
203 void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
204 
205 template <typename Dtype>
206 void caffe_gpu_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
207 
208 template <typename Dtype>
209 void caffe_gpu_abs(const int n, const Dtype* a, Dtype* y);
210 
211 template <typename Dtype>
212 void caffe_gpu_exp(const int n, const Dtype* a, Dtype* y);
213 
214 template <typename Dtype>
215 void caffe_gpu_log(const int n, const Dtype* a, Dtype* y);
216 
217 template <typename Dtype>
218 void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
219 
220 template <typename Dtype>
221 void caffe_gpu_sqrt(const int n, const Dtype* a, Dtype* y);
222 
223 // caffe_gpu_rng_uniform with two arguments generates integers in the range
224 // [0, UINT_MAX].
225 void caffe_gpu_rng_uniform(const int n, unsigned int* r);
226 
227 // caffe_gpu_rng_uniform with four arguments generates floats in the range
228 // (a, b] (strictly greater than a, less than or equal to b) due to the
229 // specification of curandGenerateUniform. With a = 0, b = 1, just calls
230 // curandGenerateUniform; with other limits will shift and scale the outputs
231 // appropriately after calling curandGenerateUniform.
232 template <typename Dtype>
233 void caffe_gpu_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
234 
235 template <typename Dtype>
236 void caffe_gpu_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
237  Dtype* r);
238 
239 template <typename Dtype>
240 void caffe_gpu_rng_bernoulli(const int n, const Dtype p, int* r);
241 
242 template <typename Dtype>
243 void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out);
244 
245 template <typename Dtype>
246 void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y);
247 
248 template<typename Dtype>
249 void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
250 
251 template<typename Dtype>
252 void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y);
253 
254 template <typename Dtype>
255 void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y);
256 
257 template <typename Dtype>
258 void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
259 
260 #define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \
261 template<typename Dtype> \
262 __global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \
263  CUDA_KERNEL_LOOP(index, n) { \
264  operation; \
265  } \
266 } \
267 template <> \
268 void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \
269  /* NOLINT_NEXT_LINE(whitespace/operators) */ \
270  name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
271  n, x, y); \
272 } \
273 template <> \
274 void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \
275  /* NOLINT_NEXT_LINE(whitespace/operators) */ \
276  name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
277  n, x, y); \
278 }
279 
280 #endif // !CPU_ONLY
281 
282 } // namespace caffe
283 
284 #endif // CAFFE_UTIL_MATH_FUNCTIONS_H_
A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14