Skip to content

Commit 5696a7a

Browse files
Add Nh to type parameter space
1 parent 8c8b85a commit 5696a7a

28 files changed

+545
-480
lines changed

ext/cuda/data_layouts_copyto.jl

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@ function knl_copyto!(dest, src)
1616
end
1717

1818
function Base.copyto!(
19-
dest::IJFH{S, Nij},
20-
bc::DataLayouts.BroadcastedUnionIJFH{S, Nij},
19+
dest::IJFH{S, Nij, Nh},
20+
bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh},
2121
::ToCUDA,
22-
) where {S, Nij}
23-
_, _, _, _, Nh = size(bc)
22+
) where {S, Nij, Nh}
2423
if Nh > 0
2524
auto_launch!(
2625
knl_copyto!,
@@ -34,11 +33,10 @@ function Base.copyto!(
3433
end
3534

3635
function Base.copyto!(
37-
dest::VIJFH{S, Nv, Nij},
38-
bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij},
36+
dest::VIJFH{S, Nv, Nij, Nh},
37+
bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh},
3938
::ToCUDA,
40-
) where {S, Nv, Nij}
41-
_, _, _, _, Nh = size(bc)
39+
) where {S, Nv, Nij, Nh}
4240
if Nv > 0 && Nh > 0
4341
Nv_per_block = min(Nv, fld(256, Nij * Nij))
4442
Nv_blocks = cld(Nv, Nv_per_block)
@@ -58,14 +56,13 @@ function Base.copyto!(
5856
bc::DataLayouts.BroadcastedUnionVF{S, Nv},
5957
::ToCUDA,
6058
) where {S, Nv}
61-
_, _, _, _, Nh = size(dest)
62-
if Nv > 0 && Nh > 0
59+
if Nv > 0
6360
auto_launch!(
6461
knl_copyto!,
6562
(dest, bc),
6663
dest;
6764
threads_s = (1, 1),
68-
blocks_s = (Nh, Nv),
65+
blocks_s = (1, Nv),
6966
)
7067
end
7168
return dest
@@ -100,8 +97,8 @@ function knl_copyto_flat!(dest::AbstractData, bc)
10097
end
10198

10299
function cuda_copyto!(dest::AbstractData, bc)
103-
(_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest)
104-
if Nv > 0 && Nh > 0 && Nf > 0
100+
(_, _, Nv, Nh) = DataLayouts.universal_size(dest)
101+
if Nv > 0 && Nh > 0
105102
auto_launch!(knl_copyto_flat!, (dest, bc), dest; auto = true)
106103
end
107104
return dest
@@ -110,12 +107,12 @@ end
110107
# TODO: can we use CUDA's luanch configuration for all data layouts?
111108
# Currently, it seems to have a slight performance degredation.
112109
#! format: off
113-
# Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
114-
Base.copyto!(dest::IFH{S, Ni}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
115-
Base.copyto!(dest::IJF{S, Nij}, bc::DataLayouts.BroadcastedUnionIJF{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
116-
Base.copyto!(dest::IF{S, Ni}, bc::DataLayouts.BroadcastedUnionIF{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
117-
Base.copyto!(dest::VIFH{S, Nv, Ni}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni}, ::ToCUDA) where {S, Nv, Ni} = cuda_copyto!(dest, bc)
118-
# Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij}, ::ToCUDA) where {S, Nv, Nij} = cuda_copyto!(dest, bc)
119-
# Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
120-
# Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
110+
# Base.copyto!(dest::IJFH{S, Nij}, bc::DataLayouts.BroadcastedUnionIJFH{S, Nij, Nh}, ::ToCUDA) where {S, Nij, Nh} = cuda_copyto!(dest, bc)
111+
Base.copyto!(dest::IFH{S, Ni, Nh}, bc::DataLayouts.BroadcastedUnionIFH{S, Ni, Nh}, ::ToCUDA) where {S, Ni, Nh} = cuda_copyto!(dest, bc)
112+
Base.copyto!(dest::IJF{S, Nij}, bc::DataLayouts.BroadcastedUnionIJF{S, Nij}, ::ToCUDA) where {S, Nij} = cuda_copyto!(dest, bc)
113+
Base.copyto!(dest::IF{S, Ni}, bc::DataLayouts.BroadcastedUnionIF{S, Ni}, ::ToCUDA) where {S, Ni} = cuda_copyto!(dest, bc)
114+
Base.copyto!(dest::VIFH{S, Nv, Ni, Nh}, bc::DataLayouts.BroadcastedUnionVIFH{S, Nv, Ni, Nh}, ::ToCUDA) where {S, Nv, Ni, Nh} = cuda_copyto!(dest, bc)
115+
# Base.copyto!(dest::VIJFH{S, Nv, Nij}, bc::DataLayouts.BroadcastedUnionVIJFH{S, Nv, Nij, Nh}, ::ToCUDA) where {S, Nv, Nij, Nh} = cuda_copyto!(dest, bc)
116+
# Base.copyto!(dest::VF{S, Nv}, bc::DataLayouts.BroadcastedUnionVF{S, Nv}, ::ToCUDA) where {S, Nv} = cuda_copyto!(dest, bc)
117+
# Base.copyto!(dest::DataF{S}, bc::DataLayouts.BroadcastedUnionDataF{S}, ::ToCUDA) where {S} = cuda_copyto!(dest, bc)
121118
#! format: on

ext/cuda/data_layouts_fill.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ function knl_fill_flat!(dest::AbstractData, val)
1111
end
1212

1313
function cuda_fill!(dest::AbstractData, val)
14-
(_, _, Nf, Nv, Nh) = DataLayouts.universal_size(dest)
15-
if Nv > 0 && Nh > 0 && Nf > 0
14+
(_, _, Nv, Nh) = DataLayouts.universal_size(dest)
15+
if Nv > 0 && Nh > 0
1616
auto_launch!(knl_fill_flat!, (dest, val), dest; auto = true)
1717
end
1818
return dest

ext/cuda/data_layouts_fused_copyto.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,9 @@ end
4242

4343
function fused_copyto!(
4444
fmbc::FusedMultiBroadcast,
45-
dest1::VIJFH{S, Nv, Nij},
45+
dest1::VIJFH{S, Nv, Nij, Nh},
4646
::ToCUDA,
47-
) where {S, Nv, Nij}
48-
_, _, _, _, Nh = size(dest1)
47+
) where {S, Nv, Nij, Nh}
4948
if Nv > 0 && Nh > 0
5049
Nv_per_block = min(Nv, fld(256, Nij * Nij))
5150
Nv_blocks = cld(Nv, Nv_per_block)

lib/ClimaCorePlots/src/ClimaCorePlots.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ function _unfolded_pannel_matrix(field, interpolate)
425425
# TODO: inefficient memory wise, but good enough for now
426426
panels = [fill(NaN, (panel_size * dof, panel_size * dof)) for _ in 1:6]
427427

428-
interpolated_data = DataLayouts.IJFH{FT, interpolate}(Array{FT}, nelem)
428+
interpolated_data = DataLayouts.IJFH{FT, interpolate, nelem}(Array{FT})
429429
field_data = Fields.field_values(field)
430430

431431
Operators.tensor_product!(interpolated_data, field_data, Imat)

0 commit comments

Comments
 (0)