forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
net_gpu_test.cc
136 lines (118 loc) · 3.47 KB
/
net_gpu_test.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#include <gtest/gtest.h>
#include "caffe2/core/common_gpu.h"
#include "caffe2/core/net.h"
#include "caffe2/core/net_dag.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/scope_guard.h"
C10_DECLARE_bool(caffe2_disable_chaining);
namespace caffe2 {
namespace {
static std::atomic<int> counter;
// A net test dummy op that does nothing but scaffolding. Here, we
// inherit from OperatorBase because we instantiate on both CPU and
// GPU. In general, you want to only inherit from Operator<Context>.
class NetTestDummyOp final : public OperatorBase {
public:
using OperatorBase::OperatorBase;
NetTestDummyOp(const OperatorDef& operator_def, Workspace* ws)
: OperatorBase(operator_def, ws),
fail_(OperatorBase::GetSingleArgument<bool>("fail", false)) {}
bool Run(int /* unused */ /*stream_id*/) override {
if (fail_) {
return false;
}
counter.fetch_add(1);
return true;
}
// Simulate CUDA operator behavior
bool HasAsyncPart() const override {
return debug_def().device_option().device_type() == PROTO_CUDA;
}
bool SupportsAsyncScheduling() const override {
return debug_def().device_option().device_type() == PROTO_CUDA;
}
protected:
const bool fail_;
};
REGISTER_CPU_OPERATOR(NetTestDummy, NetTestDummyOp);
REGISTER_CUDA_OPERATOR(NetTestDummy, NetTestDummyOp);
REGISTER_CPU_OPERATOR(NetTestDummy2, NetTestDummyOp);
REGISTER_CUDA_OPERATOR(NetTestDummy2, NetTestDummyOp);
OPERATOR_SCHEMA(NetTestDummy)
.NumInputs(0, INT_MAX)
.NumOutputs(0, INT_MAX)
.AllowInplace({{0, 0}, {1, 1}});
OPERATOR_SCHEMA(NetTestDummy2)
.NumInputs(0, INT_MAX)
.NumOutputs(0, INT_MAX)
.AllowInplace({{1, 0}});
} // namespace
void testExecution(std::unique_ptr<NetBase>& net, int num_ops) {
// Run 100 times
for (int i = 0; i < 100; i++) {
counter.exchange(0);
net.get()->Run();
ASSERT_EQ(num_ops, counter.load());
}
}
void checkChainingAndRun(
const char* spec,
const dag_utils::ExecutionChains& expected) {
Workspace ws;
ws.CreateBlob("in");
NetDef net_def;
CAFFE_ENFORCE(TextFormat::ParseFromString(spec, &net_def));
{
net_def.set_num_workers(4);
auto old = FLAGS_caffe2_disable_chaining;
auto g = MakeGuard([&]() { FLAGS_caffe2_disable_chaining = old; });
FLAGS_caffe2_disable_chaining = false;
std::unique_ptr<NetBase> net(CreateNet(net_def, &ws));
auto* dag = dynamic_cast_if_rtti<AsyncNetBase*>(net.get());
CHECK_NOTNULL(dag);
const auto& chains = dag->TEST_execution_chains();
EXPECT_EQ(chains, expected);
testExecution(net, net_def.op().size());
}
}
TEST(NetTest, DISABLED_ChainingForDifferentDevices) {
const auto spec = R"DOC(
name: "example"
type: "dag"
external_input: "in"
op {
input: "in"
output: "hidden"
type: "NetTestDummy"
}
op {
input: "hidden"
output: "out"
type: "NetTestDummy"
device_option {
device_type: 1
}
}
op {
input: "out"
output: "out2"
type: "NetTestDummy"
device_option {
device_type: 1
}
}
op {
input: "out2"
output: "out3"
type: "NetTestDummy"
device_option {
device_type: 1
device_id: 1
}
}
)DOC";
if (HasCudaGPU() && NumCudaDevices() >= 2) {
checkChainingAndRun(spec, {{0, {0, 1, 2}}, {3, {3}}});
}
}
} // namespace caffe2