1 #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_     2 #define CAFFE_UTIL_MATH_FUNCTIONS_H_     7 #include "glog/logging.h"     9 #include "caffe/common.hpp"    10 #include "caffe/util/device_alternate.hpp"    11 #include "caffe/util/mkl_alternate.hpp"    17 template <
typename Dtype>
    18 void caffe_cpu_gemm(
const CBLAS_TRANSPOSE TransA,
    19     const CBLAS_TRANSPOSE TransB, 
const int M, 
const int N, 
const int K,
    20     const Dtype alpha, 
const Dtype* A, 
const Dtype* B, 
const Dtype beta,
    23 template <
typename Dtype>
    24 void caffe_cpu_gemv(
const CBLAS_TRANSPOSE TransA, 
const int M, 
const int N,
    25     const Dtype alpha, 
const Dtype* A, 
const Dtype* x, 
const Dtype beta,
    28 template <
typename Dtype>
    29 void caffe_axpy(
const int N, 
const Dtype alpha, 
const Dtype* X,
    32 template <
typename Dtype>
    33 void caffe_cpu_axpby(
const int N, 
const Dtype alpha, 
const Dtype* X,
    34     const Dtype beta, Dtype* Y);
    36 template <
typename Dtype>
    37 void caffe_copy(
const int N, 
const Dtype *X, Dtype *Y);
    39 template <
typename Dtype>
    40 void caffe_set(
const int N, 
const Dtype alpha, Dtype *X);
    42 inline void caffe_memset(
const size_t N, 
const int alpha, 
void* X) {
    46 template <
typename Dtype>
    47 void caffe_add_scalar(
const int N, 
const Dtype alpha, Dtype *X);
    49 template <
typename Dtype>
    50 void caffe_scal(
const int N, 
const Dtype alpha, Dtype *X);
    52 template <
typename Dtype>
    53 void caffe_sqr(
const int N, 
const Dtype* a, Dtype* y);
    55 template <
typename Dtype>
    56 void caffe_sqrt(
const int N, 
const Dtype* a, Dtype* y);
    58 template <
typename Dtype>
    59 void caffe_add(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
    61 template <
typename Dtype>
    62 void caffe_sub(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
    64 template <
typename Dtype>
    65 void caffe_mul(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
    67 template <
typename Dtype>
    68 void caffe_div(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
    70 template <
typename Dtype>
    71 void caffe_powx(
const int n, 
const Dtype* a, 
const Dtype b, Dtype* y);
    73 unsigned int caffe_rng_rand();
    75 template <
typename Dtype>
    76 Dtype caffe_nextafter(
const Dtype b);
    78 template <
typename Dtype>
    79 void caffe_rng_uniform(
const int n, 
const Dtype a, 
const Dtype b, Dtype* r);
    81 template <
typename Dtype>
    82 void caffe_rng_gaussian(
const int n, 
const Dtype mu, 
const Dtype sigma,
    85 template <
typename Dtype>
    86 void caffe_rng_bernoulli(
const int n, 
const Dtype p, 
int* r);
    88 template <
typename Dtype>
    89 void caffe_rng_bernoulli(
const int n, 
const Dtype p, 
unsigned int* r);
    91 template <
typename Dtype>
    92 void caffe_exp(
const int n, 
const Dtype* a, Dtype* y);
    94 template <
typename Dtype>
    95 void caffe_log(
const int n, 
const Dtype* a, Dtype* y);
    97 template <
typename Dtype>
    98 void caffe_abs(
const int n, 
const Dtype* a, Dtype* y);
   100 template <
typename Dtype>
   101 Dtype caffe_cpu_dot(
const int n, 
const Dtype* x, 
const Dtype* y);
   103 template <
typename Dtype>
   104 Dtype caffe_cpu_strided_dot(
const int n, 
const Dtype* x, 
const int incx,
   105     const Dtype* y, 
const int incy);
   108 template <
typename Dtype>
   109 Dtype caffe_cpu_asum(
const int n, 
const Dtype* x);
   113 template<
typename Dtype>
   114 inline int8_t caffe_sign(Dtype val) {
   115   return (Dtype(0) < val) - (val < Dtype(0));
   124 #define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \   125   template<typename Dtype> \   126   void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \   127     CHECK_GT(n, 0); CHECK(x); CHECK(y); \   128     for (int i = 0; i < n; ++i) { \   134 DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]))
   140 DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \
   141     y[i] = static_cast<bool>((std::signbit)(x[i])))
   143 DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]))
   145 template <
typename Dtype>
   146 void caffe_cpu_scale(
const int n, 
const Dtype alpha, 
const Dtype *x, Dtype* y);
   148 #ifndef CPU_ONLY  // GPU   153 template <
typename Dtype>
   154 void caffe_gpu_gemm(
const CBLAS_TRANSPOSE TransA,
   155     const CBLAS_TRANSPOSE TransB, 
const int M, 
const int N, 
const int K,
   156     const Dtype alpha, 
const Dtype* A, 
const Dtype* B, 
const Dtype beta,
   159 template <
typename Dtype>
   160 void caffe_gpu_gemv(
const CBLAS_TRANSPOSE TransA, 
const int M, 
const int N,
   161     const Dtype alpha, 
const Dtype* A, 
const Dtype* x, 
const Dtype beta,
   164 template <
typename Dtype>
   165 void caffe_gpu_axpy(
const int N, 
const Dtype alpha, 
const Dtype* X,
   168 template <
typename Dtype>
   169 void caffe_gpu_axpby(
const int N, 
const Dtype alpha, 
const Dtype* X,
   170     const Dtype beta, Dtype* Y);
   172 void caffe_gpu_memcpy(
const size_t N, 
const void *X, 
void *Y);
   174 template <
typename Dtype>
   175 void caffe_gpu_set(
const int N, 
const Dtype alpha, Dtype *X);
   177 inline void caffe_gpu_memset(
const size_t N, 
const int alpha, 
void* X) {
   179   CUDA_CHECK(cudaMemset(X, alpha, N));  
   185 template <
typename Dtype>
   186 void caffe_gpu_add_scalar(
const int N, 
const Dtype alpha, Dtype *X);
   188 template <
typename Dtype>
   189 void caffe_gpu_scal(
const int N, 
const Dtype alpha, Dtype *X);
   192 template <
typename Dtype>
   193 void caffe_gpu_scal(
const int N, 
const Dtype alpha, Dtype* X, cudaStream_t str);
   196 template <
typename Dtype>
   197 void caffe_gpu_add(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
   199 template <
typename Dtype>
   200 void caffe_gpu_sub(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
   202 template <
typename Dtype>
   203 void caffe_gpu_mul(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
   205 template <
typename Dtype>
   206 void caffe_gpu_div(
const int N, 
const Dtype* a, 
const Dtype* b, Dtype* y);
   208 template <
typename Dtype>
   209 void caffe_gpu_abs(
const int n, 
const Dtype* a, Dtype* y);
   211 template <
typename Dtype>
   212 void caffe_gpu_exp(
const int n, 
const Dtype* a, Dtype* y);
   214 template <
typename Dtype>
   215 void caffe_gpu_log(
const int n, 
const Dtype* a, Dtype* y);
   217 template <
typename Dtype>
   218 void caffe_gpu_powx(
const int n, 
const Dtype* a, 
const Dtype b, Dtype* y);
   220 template <
typename Dtype>
   221 void caffe_gpu_sqrt(
const int n, 
const Dtype* a, Dtype* y);
   225 void caffe_gpu_rng_uniform(
const int n, 
unsigned int* r);
   232 template <
typename Dtype>
   233 void caffe_gpu_rng_uniform(
const int n, 
const Dtype a, 
const Dtype b, Dtype* r);
   235 template <
typename Dtype>
   236 void caffe_gpu_rng_gaussian(
const int n, 
const Dtype mu, 
const Dtype sigma,
   239 template <
typename Dtype>
   240 void caffe_gpu_rng_bernoulli(
const int n, 
const Dtype p, 
int* r);
   242 template <
typename Dtype>
   243 void caffe_gpu_dot(
const int n, 
const Dtype* x, 
const Dtype* y, Dtype* out);
   245 template <
typename Dtype>
   246 void caffe_gpu_asum(
const int n, 
const Dtype* x, Dtype* y);
   248 template<
typename Dtype>
   249 void caffe_gpu_sign(
const int n, 
const Dtype* x, Dtype* y);
   251 template<
typename Dtype>
   252 void caffe_gpu_sgnbit(
const int n, 
const Dtype* x, Dtype* y);
   254 template <
typename Dtype>
   255 void caffe_gpu_fabs(
const int n, 
const Dtype* x, Dtype* y);
   257 template <
typename Dtype>
   258 void caffe_gpu_scale(
const int n, 
const Dtype alpha, 
const Dtype *x, Dtype* y);
   260 #define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \   261 template<typename Dtype> \   262 __global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \   263   CUDA_KERNEL_LOOP(index, n) { \   268 void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \   270   name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \   274 void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \   276   name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \   284 #endif  // CAFFE_UTIL_MATH_FUNCTIONS_H_ A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14