forked from ROCm/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
defs_gpu.bzl
166 lines (138 loc) · 5.88 KB
/
defs_gpu.bzl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
load(
"//caffe2/caffe2:defs_hip.bzl",
"get_caffe2_hip_headers",
"get_caffe2_hip_srcs",
)
load(":ufunc_defs.bzl", "aten_ufunc_names")
ATEN_CUDA_H_PATTERN = [
"aten/src/ATen/cuda/*.h",
"aten/src/ATen/cuda/detail/*.h",
"aten/src/ATen/cuda/nvrtc_stub/*.h",
"aten/src/ATen/cuda/*.cuh",
"aten/src/ATen/cuda/detail/*.cuh",
]
ATEN_CUDA_CPP_PATTERN = [
"aten/src/ATen/cuda/*.cpp",
"aten/src/ATen/cuda/detail/*.cpp",
"aten/src/ATen/cuda/nvrtc_stub/*.cpp",
]
ATEN_CUDA_CU_PATTERN = [
"aten/src/ATen/cuda/*.cu",
"aten/src/ATen/cuda/detail/*.cu",
]
ATEN_CUDNN_H_PATTERN = [
"aten/src/ATen/cudnn/*.h",
"aten/src/ATen/cudnn/*.cuh",
]
ATEN_CUDNN_CPP_PATTERN = ["aten/src/ATen/cudnn/*.cpp"]
ATEN_MIOPEN_H_PATTERN = [
"aten/src/ATen/miopen/*.h",
"aten/src/ATen/miopen/*.cuh",
]
ATEN_MIOPEN_CPP_PATTERN = ["aten/src/ATen/miopen/*.cpp"]
ATEN_NATIVE_CUDNN_CPP_PATTERN = ["aten/src/ATen/native/cudnn/*.cpp"]
ATEN_NATIVE_MIOPEN_CPP_PATTERN = ["aten/src/ATen/native/miopen/*.cpp"]
ATEN_NATIVE_CUDA_CU_PATTERN = [
"aten/src/ATen/native/cuda/*.cu",
"aten/src/ATen/native/nested/cuda/*.cu",
"aten/src/ATen/native/quantized/cuda/*.cu",
"aten/src/ATen/native/sparse/cuda/*.cu",
"aten/src/ATen/native/transformers/**/*.cu",
]
ATEN_NATIVE_CUDA_CPP_PATTERN = [
"aten/src/ATen/native/cuda/*.cpp",
"aten/src/ATen/native/cuda/linalg/*.cpp",
"aten/src/ATen/native/nested/cuda/*.cpp",
"aten/src/ATen/native/sparse/cuda/*.cpp",
"aten/src/ATen/native/transformers/cuda/*.cpp",
]
ATEN_NATIVE_CUDA_H_PATTERN = [
"aten/src/ATen/native/cudnn/**/*.h",
"aten/src/ATen/native/cuda/**/*.h",
"aten/src/ATen/native/cuda/**/*.cuh",
"aten/src/ATen/native/sparse/cuda/*.h",
"aten/src/ATen/native/sparse/cuda/*.cuh",
"aten/src/ATen/native/quantized/cuda/*.h",
"aten/src/ATen/native/transformers/cuda/*.h",
"aten/src/ATen/native/transformers/**/*.cuh",
]
# T66678203: Clang CUDA rollout
ATEN_CUDA_CLANG_CU_PATTERN = [
"aten/src/ATen/native/cuda/DistributionBernoulli.cu",
]
### Cuda Files
def get_aten_cuda_headers():
ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
ATEN_CUDNN_H = native.glob(ATEN_CUDNN_H_PATTERN)
return ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
def get_aten_cuda_srcs():
ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
ATEN_NATIVE_CUDA_CU = native.glob(
ATEN_NATIVE_CUDA_CU_PATTERN,
exclude = ATEN_CUDA_CLANG_CU_PATTERN,
)
return ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU
def get_aten_cuda_clang_srcs():
return native.glob(ATEN_CUDA_CLANG_CU_PATTERN)
# CPU+CUDA file
# Note that these sources and headers include the CPU lists too
def get_all_cuda_srcs():
ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
return ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_NATIVE_MIOPEN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP + get_aten_cuda_srcs()
### HIP files
# Files that must be hipified
def get_aten_hip_srcs():
## CU -> HIP files
ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
# HIP does not use clang for ATEN_CUDA_CLANG_CU_PATTERN
ATEN_NATIVE_CUDA_CU = native.glob(ATEN_NATIVE_CUDA_CU_PATTERN)
## CPU files
ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
# Get hipified file names (before, after)
srcs = ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU + ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP
ret = get_caffe2_hip_srcs(include_patterns = [], include_files = srcs, project_dir = "")
return (ret[0], [f.replace("aten/src/", "") for f in ret[1]])
def get_aten_hip_headers():
ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
ATEN_CUDNN_H = [] # native.glob(ATEN_CUDNN_H_PATTERN)
# Get hipified file names (before, after)
srcs = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
ret = get_caffe2_hip_headers(include_patterns = [], include_files = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H, project_dir = "")
return ret[0], [f.replace("aten/src/", "") for f in ret[1]]
# Native HIP-aware files
def get_aten_hip_native_srcs():
HIP_IMPL_CPP = native.glob(["aten/src/ATen/hip/impl/*.cpp"])
ATEN_MIOPEN_CPP = native.glob(ATEN_MIOPEN_CPP_PATTERN)
ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
return HIP_IMPL_CPP + ATEN_MIOPEN_CPP + ATEN_NATIVE_MIOPEN_CPP
def get_aten_hip_native_headers():
HIP_IMPL_H = native.glob(["aten/src/ATen/hip/impl/*.h"])
ATEN_MIOPEN_H = native.glob(ATEN_MIOPEN_H_PATTERN)
return HIP_IMPL_H + ATEN_MIOPEN_H
def get_aten_hip_ufunc_generated_cuda_sources(gencode_pattern = "{}"):
# Contents of these CUDA files do not need to be hipified at this point,
# but they must be renamed from ".cu" to ".hip" because, unlike OSS, a compiler
# is selected based on a file extension.
renamed_rules = []
for n in aten_ufunc_names:
cuda_name = "UfuncCUDA_{}.cu".format(n)
hip_name = "UfuncCUDA_{}.hip".format(n)
buck_genrule(
name = "aten_ufunc_hip_renamed_{}".format(n),
srcs = [gencode_pattern.format(cuda_name)],
bash = 'cp "$SRCDIR/{}" "$OUT"'.format(cuda_name),
out = hip_name,
default_outs = [],
)
renamed_rules.append(":aten_ufunc_hip_renamed_{}".format(n))
return renamed_rules