1 #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ 2 #define CAFFE_UTIL_MATH_FUNCTIONS_H_ 7 #include "glog/logging.h" 9 #include "caffe/common.hpp" 10 #include "caffe/util/device_alternate.hpp" 11 #include "caffe/util/mkl_alternate.hpp" 17 template <
typename Dtype>
18 void caffe_cpu_gemm(
const CBLAS_TRANSPOSE TransA,
19 const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
20 const Dtype alpha,
const Dtype* A,
const Dtype* B,
const Dtype beta,
23 template <
typename Dtype>
24 void caffe_cpu_gemv(
const CBLAS_TRANSPOSE TransA,
const int M,
const int N,
25 const Dtype alpha,
const Dtype* A,
const Dtype* x,
const Dtype beta,
28 template <
typename Dtype>
29 void caffe_axpy(
const int N,
const Dtype alpha,
const Dtype* X,
32 template <
typename Dtype>
33 void caffe_cpu_axpby(
const int N,
const Dtype alpha,
const Dtype* X,
34 const Dtype beta, Dtype* Y);
36 template <
typename Dtype>
37 void caffe_copy(
const int N,
const Dtype *X, Dtype *Y);
39 template <
typename Dtype>
40 void caffe_set(
const int N,
const Dtype alpha, Dtype *X);
42 inline void caffe_memset(
const size_t N,
const int alpha,
void* X) {
46 template <
typename Dtype>
47 void caffe_add_scalar(
const int N,
const Dtype alpha, Dtype *X);
49 template <
typename Dtype>
50 void caffe_scal(
const int N,
const Dtype alpha, Dtype *X);
52 template <
typename Dtype>
53 void caffe_sqr(
const int N,
const Dtype* a, Dtype* y);
55 template <
typename Dtype>
56 void caffe_sqrt(
const int N,
const Dtype* a, Dtype* y);
58 template <
typename Dtype>
59 void caffe_add(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
61 template <
typename Dtype>
62 void caffe_sub(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
64 template <
typename Dtype>
65 void caffe_mul(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
67 template <
typename Dtype>
68 void caffe_div(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
70 template <
typename Dtype>
71 void caffe_powx(
const int n,
const Dtype* a,
const Dtype b, Dtype* y);
73 unsigned int caffe_rng_rand();
75 template <
typename Dtype>
76 Dtype caffe_nextafter(
const Dtype b);
78 template <
typename Dtype>
79 void caffe_rng_uniform(
const int n,
const Dtype a,
const Dtype b, Dtype* r);
81 template <
typename Dtype>
82 void caffe_rng_gaussian(
const int n,
const Dtype mu,
const Dtype sigma,
85 template <
typename Dtype>
86 void caffe_rng_bernoulli(
const int n,
const Dtype p,
int* r);
88 template <
typename Dtype>
89 void caffe_rng_bernoulli(
const int n,
const Dtype p,
unsigned int* r);
91 template <
typename Dtype>
92 void caffe_exp(
const int n,
const Dtype* a, Dtype* y);
94 template <
typename Dtype>
95 void caffe_log(
const int n,
const Dtype* a, Dtype* y);
97 template <
typename Dtype>
98 void caffe_abs(
const int n,
const Dtype* a, Dtype* y);
100 template <
typename Dtype>
101 Dtype caffe_cpu_dot(
const int n,
const Dtype* x,
const Dtype* y);
103 template <
typename Dtype>
104 Dtype caffe_cpu_strided_dot(
const int n,
const Dtype* x,
const int incx,
105 const Dtype* y,
const int incy);
108 template <
typename Dtype>
109 Dtype caffe_cpu_asum(
const int n,
const Dtype* x);
113 template<
typename Dtype>
114 inline int8_t caffe_sign(Dtype val) {
115 return (Dtype(0) < val) - (val < Dtype(0));
124 #define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ 125 template<typename Dtype> \ 126 void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \ 127 CHECK_GT(n, 0); CHECK(x); CHECK(y); \ 128 for (int i = 0; i < n; ++i) { \ 134 DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]))
140 DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \
141 y[i] = static_cast<bool>((std::signbit)(x[i])))
143 DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]))
145 template <
typename Dtype>
146 void caffe_cpu_scale(
const int n,
const Dtype alpha,
const Dtype *x, Dtype* y);
148 #ifndef CPU_ONLY // GPU 153 template <
typename Dtype>
154 void caffe_gpu_gemm(
const CBLAS_TRANSPOSE TransA,
155 const CBLAS_TRANSPOSE TransB,
const int M,
const int N,
const int K,
156 const Dtype alpha,
const Dtype* A,
const Dtype* B,
const Dtype beta,
159 template <
typename Dtype>
160 void caffe_gpu_gemv(
const CBLAS_TRANSPOSE TransA,
const int M,
const int N,
161 const Dtype alpha,
const Dtype* A,
const Dtype* x,
const Dtype beta,
164 template <
typename Dtype>
165 void caffe_gpu_axpy(
const int N,
const Dtype alpha,
const Dtype* X,
168 template <
typename Dtype>
169 void caffe_gpu_axpby(
const int N,
const Dtype alpha,
const Dtype* X,
170 const Dtype beta, Dtype* Y);
172 void caffe_gpu_memcpy(
const size_t N,
const void *X,
void *Y);
174 template <
typename Dtype>
175 void caffe_gpu_set(
const int N,
const Dtype alpha, Dtype *X);
177 inline void caffe_gpu_memset(
const size_t N,
const int alpha,
void* X) {
179 CUDA_CHECK(cudaMemset(X, alpha, N));
185 template <
typename Dtype>
186 void caffe_gpu_add_scalar(
const int N,
const Dtype alpha, Dtype *X);
188 template <
typename Dtype>
189 void caffe_gpu_scal(
const int N,
const Dtype alpha, Dtype *X);
192 template <
typename Dtype>
193 void caffe_gpu_scal(
const int N,
const Dtype alpha, Dtype* X, cudaStream_t str);
196 template <
typename Dtype>
197 void caffe_gpu_add(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
199 template <
typename Dtype>
200 void caffe_gpu_sub(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
202 template <
typename Dtype>
203 void caffe_gpu_mul(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
205 template <
typename Dtype>
206 void caffe_gpu_div(
const int N,
const Dtype* a,
const Dtype* b, Dtype* y);
208 template <
typename Dtype>
209 void caffe_gpu_abs(
const int n,
const Dtype* a, Dtype* y);
211 template <
typename Dtype>
212 void caffe_gpu_exp(
const int n,
const Dtype* a, Dtype* y);
214 template <
typename Dtype>
215 void caffe_gpu_log(
const int n,
const Dtype* a, Dtype* y);
217 template <
typename Dtype>
218 void caffe_gpu_powx(
const int n,
const Dtype* a,
const Dtype b, Dtype* y);
220 template <
typename Dtype>
221 void caffe_gpu_sqrt(
const int n,
const Dtype* a, Dtype* y);
225 void caffe_gpu_rng_uniform(
const int n,
unsigned int* r);
232 template <
typename Dtype>
233 void caffe_gpu_rng_uniform(
const int n,
const Dtype a,
const Dtype b, Dtype* r);
235 template <
typename Dtype>
236 void caffe_gpu_rng_gaussian(
const int n,
const Dtype mu,
const Dtype sigma,
239 template <
typename Dtype>
240 void caffe_gpu_rng_bernoulli(
const int n,
const Dtype p,
int* r);
242 template <
typename Dtype>
243 void caffe_gpu_dot(
const int n,
const Dtype* x,
const Dtype* y, Dtype* out);
245 template <
typename Dtype>
246 void caffe_gpu_asum(
const int n,
const Dtype* x, Dtype* y);
248 template<
typename Dtype>
249 void caffe_gpu_sign(
const int n,
const Dtype* x, Dtype* y);
251 template<
typename Dtype>
252 void caffe_gpu_sgnbit(
const int n,
const Dtype* x, Dtype* y);
254 template <
typename Dtype>
255 void caffe_gpu_fabs(
const int n,
const Dtype* x, Dtype* y);
257 template <
typename Dtype>
258 void caffe_gpu_scale(
const int n,
const Dtype alpha,
const Dtype *x, Dtype* y);
260 #define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ 261 template<typename Dtype> \ 262 __global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \ 263 CUDA_KERNEL_LOOP(index, n) { \ 268 void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \ 270 name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \ 274 void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \ 276 name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \ 284 #endif // CAFFE_UTIL_MATH_FUNCTIONS_H_ A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14