1 #ifndef CAFFE_UTIL_CUDNN_H_ 2 #define CAFFE_UTIL_CUDNN_H_ 7 #include "caffe/common.hpp" 8 #include "caffe/proto/caffe.pb.h" 10 #define CUDNN_VERSION_MIN(major, minor, patch) \ 11 (CUDNN_VERSION >= (major * 1000 + minor * 100 + patch)) 13 #define CUDNN_CHECK(condition) \ 15 cudnnStatus_t status = condition; \ 16 CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << " "\ 17 << cudnnGetErrorString(status); \ 20 inline const char* cudnnGetErrorString(cudnnStatus_t status) {
22 case CUDNN_STATUS_SUCCESS:
23 return "CUDNN_STATUS_SUCCESS";
24 case CUDNN_STATUS_NOT_INITIALIZED:
25 return "CUDNN_STATUS_NOT_INITIALIZED";
26 case CUDNN_STATUS_ALLOC_FAILED:
27 return "CUDNN_STATUS_ALLOC_FAILED";
28 case CUDNN_STATUS_BAD_PARAM:
29 return "CUDNN_STATUS_BAD_PARAM";
30 case CUDNN_STATUS_INTERNAL_ERROR:
31 return "CUDNN_STATUS_INTERNAL_ERROR";
32 case CUDNN_STATUS_INVALID_VALUE:
33 return "CUDNN_STATUS_INVALID_VALUE";
34 case CUDNN_STATUS_ARCH_MISMATCH:
35 return "CUDNN_STATUS_ARCH_MISMATCH";
36 case CUDNN_STATUS_MAPPING_ERROR:
37 return "CUDNN_STATUS_MAPPING_ERROR";
38 case CUDNN_STATUS_EXECUTION_FAILED:
39 return "CUDNN_STATUS_EXECUTION_FAILED";
40 case CUDNN_STATUS_NOT_SUPPORTED:
41 return "CUDNN_STATUS_NOT_SUPPORTED";
42 case CUDNN_STATUS_LICENSE_ERROR:
43 return "CUDNN_STATUS_LICENSE_ERROR";
45 return "Unknown cudnn status";
52 template <
typename Dtype>
class dataType;
53 template<>
class dataType<float> {
55 static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
56 static float oneval, zeroval;
57 static const void *one, *zero;
59 template<>
class dataType<double> {
61 static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
62 static double oneval, zeroval;
63 static const void *one, *zero;
66 template <
typename Dtype>
67 inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
68 CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
71 template <
typename Dtype>
72 inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
73 int n,
int c,
int h,
int w,
74 int stride_n,
int stride_c,
int stride_h,
int stride_w) {
75 CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
76 n, c, h, w, stride_n, stride_c, stride_h, stride_w));
79 template <
typename Dtype>
80 inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
81 int n,
int c,
int h,
int w) {
82 const int stride_w = 1;
83 const int stride_h = w * stride_w;
84 const int stride_c = h * stride_h;
85 const int stride_n = c * stride_c;
86 setTensor4dDesc<Dtype>(desc, n, c, h, w,
87 stride_n, stride_c, stride_h, stride_w);
90 template <
typename Dtype>
91 inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
92 int n,
int c,
int h,
int w) {
93 CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
94 #if CUDNN_VERSION_MIN(5, 0, 0) 95 CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
96 CUDNN_TENSOR_NCHW, n, c, h, w));
98 CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
99 CUDNN_TENSOR_NCHW, n, c, h, w));
103 template <
typename Dtype>
104 inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {
105 CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv));
108 template <
typename Dtype>
109 inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
110 cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
111 int pad_h,
int pad_w,
int stride_h,
int stride_w) {
112 CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
113 pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
116 template <
typename Dtype>
117 inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
118 PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
119 int h,
int w,
int pad_h,
int pad_w,
int stride_h,
int stride_w) {
120 switch (poolmethod) {
121 case PoolingParameter_PoolMethod_MAX:
122 *mode = CUDNN_POOLING_MAX;
124 case PoolingParameter_PoolMethod_AVE:
125 *mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
128 LOG(FATAL) <<
"Unknown pooling method.";
130 CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
131 #if CUDNN_VERSION_MIN(5, 0, 0) 132 CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
133 CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
135 CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
136 CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
140 template <
typename Dtype>
141 inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,
142 cudnnActivationMode_t mode) {
143 CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));
144 CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,
145 CUDNN_PROPAGATE_NAN, Dtype(0)));
153 #endif // CAFFE_UTIL_CUDNN_H_ A layer factory that allows one to register layers. During runtime, registered layers can be called b...
Definition: blob.hpp:14