forked from csarofeen/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common_cudnn.h
321 lines (292 loc) · 9.66 KB
/
common_cudnn.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#ifndef CAFFE2_CORE_COMMON_CUDNN_H_
#define CAFFE2_CORE_COMMON_CUDNN_H_
#include <array>
#include <mutex>
#include "caffe2/core/common.h"
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/types.h"
#ifndef CAFFE2_USE_CUDNN
#error("This Caffe2 install is not built with cudnn, so you should not include this file.");
#endif
#include <cudnn.h>
static_assert(
CUDNN_VERSION >= 5000,
"Caffe2 requires cudnn version 5.0 or above.");
#if CUDNN_VERSION < 6000
#pragma message "CUDNN version under 6.0 is supported at best effort."
#pragma message "We strongly encourage you to move to 6.0 and above."
#pragma message "This message is intended to annoy you enough to update."
#endif // CUDNN_VERSION < 6000
#define CUDNN_VERSION_MIN(major, minor, patch) \
(CUDNN_VERSION >= ((major) * 1000 + (minor) * 100 + (patch)))
namespace caffe2 {
namespace internal {
/**
* A helper function to obtain cudnn error strings.
*/
inline const char* cudnnGetErrorString(cudnnStatus_t status) {
switch (status) {
case CUDNN_STATUS_SUCCESS:
return "CUDNN_STATUS_SUCCESS";
case CUDNN_STATUS_NOT_INITIALIZED:
return "CUDNN_STATUS_NOT_INITIALIZED";
case CUDNN_STATUS_ALLOC_FAILED:
return "CUDNN_STATUS_ALLOC_FAILED";
case CUDNN_STATUS_BAD_PARAM:
return "CUDNN_STATUS_BAD_PARAM";
case CUDNN_STATUS_INTERNAL_ERROR:
return "CUDNN_STATUS_INTERNAL_ERROR";
case CUDNN_STATUS_INVALID_VALUE:
return "CUDNN_STATUS_INVALID_VALUE";
case CUDNN_STATUS_ARCH_MISMATCH:
return "CUDNN_STATUS_ARCH_MISMATCH";
case CUDNN_STATUS_MAPPING_ERROR:
return "CUDNN_STATUS_MAPPING_ERROR";
case CUDNN_STATUS_EXECUTION_FAILED:
return "CUDNN_STATUS_EXECUTION_FAILED";
case CUDNN_STATUS_NOT_SUPPORTED:
return "CUDNN_STATUS_NOT_SUPPORTED";
case CUDNN_STATUS_LICENSE_ERROR:
return "CUDNN_STATUS_LICENSE_ERROR";
default:
return "Unknown cudnn error number";
}
}
} // namespace internal
// A macro that wraps around a cudnn statement so we can check if the cudnn
// execution finishes or not.
#define CUDNN_ENFORCE(condition) \
do { \
cudnnStatus_t status = condition; \
CAFFE_ENFORCE_EQ( \
status, \
CUDNN_STATUS_SUCCESS, \
", Error at: ", \
__FILE__, \
":", \
__LINE__, \
": ", \
::caffe2::internal::cudnnGetErrorString(status)); \
} while (0)
#define CUDNN_CHECK(condition) \
do { \
cudnnStatus_t status = condition; \
CHECK(status == CUDNN_STATUS_SUCCESS) \
<< ::caffe2::internal::cudnnGetErrorString(status); \
} while (0)
// report the version of cuDNN Caffe2 was compiled with
inline size_t cudnnCompiledVersion() {
return CUDNN_VERSION;
}
// report the runtime version of cuDNN
inline size_t cudnnRuntimeVersion() {
return cudnnGetVersion();
}
// Check compatibility of compiled and runtime cuDNN versions
inline void CheckCuDNNVersions() {
// Version format is major*1000 + minor*100 + patch
// If compiled with version < 7, major, minor and patch must all match
// If compiled with version >= 7, then either
// runtime_version > compiled_version
// major and minor match
bool version_match = cudnnCompiledVersion() == cudnnRuntimeVersion();
bool compiled_with_7 = cudnnCompiledVersion() >= 7000;
bool backwards_compatible_7 = compiled_with_7 && cudnnRuntimeVersion() >= cudnnCompiledVersion();
bool patch_compatible = compiled_with_7 && (cudnnRuntimeVersion() / 100) == (cudnnCompiledVersion() / 100);
CAFFE_ENFORCE(version_match || backwards_compatible_7 || patch_compatible,
"cuDNN compiled (", cudnnCompiledVersion(), ") and "
"runtime (", cudnnRuntimeVersion(), ") versions mismatch");
}
/**
* cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type
* in a template function. The class is specialized explicitly for different
* data types below.
*/
template <typename T>
class cudnnTypeWrapper;
template <>
class cudnnTypeWrapper<float> {
public:
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
typedef const float ScalingParamType;
typedef float BNParamType;
static ScalingParamType* kOne() {
static ScalingParamType v = 1.0;
return &v;
}
static const ScalingParamType* kZero() {
static ScalingParamType v = 0.0;
return &v;
}
};
#if CUDNN_VERSION_MIN(6, 0, 0)
template <>
class cudnnTypeWrapper<int> {
public:
static const cudnnDataType_t type = CUDNN_DATA_INT32;
typedef const int ScalingParamType;
typedef int BNParamType;
static ScalingParamType* kOne() {
static ScalingParamType v = 1;
return &v;
}
static const ScalingParamType* kZero() {
static ScalingParamType v = 0;
return &v;
}
};
#endif // CUDNN_VERSION_MIN(6, 0, 0)
template <>
class cudnnTypeWrapper<double> {
public:
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
typedef const double ScalingParamType;
typedef double BNParamType;
static ScalingParamType* kOne() {
static ScalingParamType v = 1.0;
return &v;
}
static ScalingParamType* kZero() {
static ScalingParamType v = 0.0;
return &v;
}
};
template <>
class cudnnTypeWrapper<at::Half> {
public:
static const cudnnDataType_t type = CUDNN_DATA_HALF;
typedef const float ScalingParamType;
typedef float BNParamType;
static ScalingParamType* kOne() {
static ScalingParamType v = 1.0;
return &v;
}
static ScalingParamType* kZero() {
static ScalingParamType v = 0.0;
return &v;
}
};
/**
* A wrapper function to convert the Caffe storage order to cudnn storage order
* enum values.
*/
inline cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder& order) {
switch (order) {
case StorageOrder::NHWC:
return CUDNN_TENSOR_NHWC;
case StorageOrder::NCHW:
return CUDNN_TENSOR_NCHW;
default:
LOG(FATAL) << "Unknown cudnn equivalent for order: " << order;
}
// Just to suppress compiler warnings
return CUDNN_TENSOR_NCHW;
}
/**
* cudnnTensorDescWrapper is the placeholder that wraps around a
* cudnnTensorDescriptor_t, allowing us to do descriptor change as-needed during
* runtime.
*/
class cudnnTensorDescWrapper {
public:
cudnnTensorDescWrapper() {
CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&desc_));
}
~cudnnTensorDescWrapper() noexcept {
CUDNN_CHECK(cudnnDestroyTensorDescriptor(desc_));
}
inline cudnnTensorDescriptor_t Descriptor(
const cudnnTensorFormat_t format,
const cudnnDataType_t type,
const vector<int>& dims,
bool* changed) {
if (type_ == type && format_ == format && dims_ == dims) {
// if not changed, simply return the current descriptor.
if (changed)
*changed = false;
return desc_;
}
CAFFE_ENFORCE_EQ(
dims.size(), 4U, "Currently only 4-dimensional descriptor supported.");
format_ = format;
type_ = type;
dims_ = dims;
CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
desc_,
format,
type,
dims_[0],
(format == CUDNN_TENSOR_NCHW ? dims_[1] : dims_[3]),
(format == CUDNN_TENSOR_NCHW ? dims_[2] : dims_[1]),
(format == CUDNN_TENSOR_NCHW ? dims_[3] : dims_[2])));
if (changed)
*changed = true;
return desc_;
}
template <typename T>
inline cudnnTensorDescriptor_t Descriptor(
const StorageOrder& order,
const vector<int>& dims) {
return Descriptor(
GetCudnnTensorFormat(order), cudnnTypeWrapper<T>::type, dims, nullptr);
}
private:
cudnnTensorDescriptor_t desc_;
cudnnTensorFormat_t format_;
cudnnDataType_t type_;
vector<int> dims_;
C10_DISABLE_COPY_AND_ASSIGN(cudnnTensorDescWrapper);
};
class cudnnFilterDescWrapper {
public:
cudnnFilterDescWrapper() {
CUDNN_ENFORCE(cudnnCreateFilterDescriptor(&desc_));
}
~cudnnFilterDescWrapper() noexcept {
CUDNN_CHECK(cudnnDestroyFilterDescriptor(desc_));
}
inline cudnnFilterDescriptor_t Descriptor(
const StorageOrder& order,
const cudnnDataType_t type,
const vector<int>& dims,
bool* changed) {
if (type_ == type && order_ == order && dims_ == dims) {
// if not changed, simply return the current descriptor.
if (changed)
*changed = false;
return desc_;
}
CAFFE_ENFORCE_EQ(
dims.size(), 4U, "Currently only 4-dimensional descriptor supported.");
order_ = order;
type_ = type;
dims_ = dims;
CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
desc_,
type,
GetCudnnTensorFormat(order),
dims_[0],
// TODO - confirm that this is correct for NHWC
(order == StorageOrder::NCHW ? dims_[1] : dims_[3]),
(order == StorageOrder::NCHW ? dims_[2] : dims_[1]),
(order == StorageOrder::NCHW ? dims_[3] : dims_[2])));
if (changed)
*changed = true;
return desc_;
}
template <typename T>
inline cudnnFilterDescriptor_t Descriptor(
const StorageOrder& order,
const vector<int>& dims) {
return Descriptor(order, cudnnTypeWrapper<T>::type, dims, nullptr);
}
private:
cudnnFilterDescriptor_t desc_;
StorageOrder order_;
cudnnDataType_t type_;
vector<int> dims_;
C10_DISABLE_COPY_AND_ASSIGN(cudnnFilterDescWrapper);
};
} // namespace caffe2
#endif // CAFFE2_CORE_COMMON_CUDNN_H_