@@ -16,11 +16,10 @@ function knl_copyto!(dest, src)
16
16
end
17
17
18
18
function Base. copyto! (
19
- dest:: IJFH{S, Nij} ,
20
- bc:: DataLayouts.BroadcastedUnionIJFH{S, Nij} ,
19
+ dest:: IJFH{S, Nij, Nh } ,
20
+ bc:: DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh } ,
21
21
:: ToCUDA ,
22
- ) where {S, Nij}
23
- _, _, _, _, Nh = size (bc)
22
+ ) where {S, Nij, Nh}
24
23
if Nh > 0
25
24
auto_launch! (
26
25
knl_copyto!,
@@ -34,11 +33,10 @@ function Base.copyto!(
34
33
end
35
34
36
35
function Base. copyto! (
37
- dest:: VIJFH{S, Nv, Nij} ,
38
- bc:: DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij} ,
36
+ dest:: VIJFH{S, Nv, Nij, Nh } ,
37
+ bc:: DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh } ,
39
38
:: ToCUDA ,
40
- ) where {S, Nv, Nij}
41
- _, _, _, _, Nh = size (bc)
39
+ ) where {S, Nv, Nij, Nh}
42
40
if Nv > 0 && Nh > 0
43
41
Nv_per_block = min (Nv, fld (256 , Nij * Nij))
44
42
Nv_blocks = cld (Nv, Nv_per_block)
@@ -58,14 +56,13 @@ function Base.copyto!(
58
56
bc:: DataLayouts.BroadcastedUnionVF{S, Nv} ,
59
57
:: ToCUDA ,
60
58
) where {S, Nv}
61
- _, _, _, _, Nh = size (dest)
62
- if Nv > 0 && Nh > 0
59
+ if Nv > 0
63
60
auto_launch! (
64
61
knl_copyto!,
65
62
(dest, bc),
66
63
dest;
67
64
threads_s = (1 , 1 ),
68
- blocks_s = (Nh , Nv),
65
+ blocks_s = (1 , Nv),
69
66
)
70
67
end
71
68
return dest
@@ -100,8 +97,8 @@ function knl_copyto_flat!(dest::AbstractData, bc)
100
97
end
101
98
102
99
function cuda_copyto! (dest:: AbstractData , bc)
103
- (_, _, Nf, Nv, Nh) = DataLayouts. universal_size (dest)
104
- if Nv > 0 && Nh > 0 && Nf > 0
100
+ (_, _, Nv, Nh) = DataLayouts. universal_size (dest)
101
+ if Nv > 0 && Nh > 0
105
102
auto_launch! (knl_copyto_flat!, (dest, bc), dest; auto = true )
106
103
end
107
104
return dest
@@ -110,12 +107,12 @@ end
110
107
# TODO : can we use CUDA's luanch configuration for all data layouts?
111
108
# Currently, it seems to have a slight performance degredation.
112
109
# ! format: off
113
- # Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
114
- Base. copyto! (dest:: IFH{S, Ni} , bc:: DataLayouts.BroadcastedUnionIFH{S, Ni} , :: ToCUDA ) where {S, Ni} = cuda_copyto! (dest, bc)
115
- Base. copyto! (dest:: IJF{S, Nij} , bc:: DataLayouts.BroadcastedUnionIJF{S, Nij} , :: ToCUDA ) where {S, Nij} = cuda_copyto! (dest, bc)
116
- Base. copyto! (dest:: IF{S, Ni} , bc:: DataLayouts.BroadcastedUnionIF{S, Ni} , :: ToCUDA ) where {S, Ni} = cuda_copyto! (dest, bc)
117
- Base. copyto! (dest:: VIFH{S, Nv, Ni} , bc:: DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni} , :: ToCUDA ) where {S, Nv, Ni} = cuda_copyto! (dest, bc)
118
- # Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij}, ::ToCUDA) where {S, Nv, Nij} = cuda_copyto!(dest, bc)
119
- # Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
120
- # Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
110
+ # Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh }, ::ToCUDA) where {S, Nij, Nh } = cuda_copyto!(dest, bc)
111
+ Base. copyto! (dest:: IFH{S, Ni, Nh } , bc:: DataLayouts.BroadcastedUnionIFH{S, Ni, Nh } , :: ToCUDA ) where {S, Ni, Nh } = cuda_copyto! (dest, bc)
112
+ Base. copyto! (dest:: IJF{S, Nij} , bc:: DataLayouts.BroadcastedUnionIJF{S, Nij} , :: ToCUDA ) where {S, Nij} = cuda_copyto! (dest, bc)
113
+ Base. copyto! (dest:: IF{S, Ni} , bc:: DataLayouts.BroadcastedUnionIF{S, Ni} , :: ToCUDA ) where {S, Ni} = cuda_copyto! (dest, bc)
114
+ Base. copyto! (dest:: VIFH{S, Nv, Ni, Nh } , bc:: DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni, Nh } , :: ToCUDA ) where {S, Nv, Ni, Nh } = cuda_copyto! (dest, bc)
115
+ # Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh }, ::ToCUDA) where {S, Nv, Nij, Nh } = cuda_copyto!(dest, bc)
116
+ # Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
117
+ # Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
121
118
# ! format: on
0 commit comments