-
Notifications
You must be signed in to change notification settings - Fork 1
/
Stokes2D.jl
175 lines (164 loc) · 7.91 KB
/
Stokes2D.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
const use_return = haskey(ENV, "USE_RETURN" ) ? parse(Bool, ENV["USE_RETURN"] ) : false
const USE_GPU = haskey(ENV, "USE_GPU" ) ? parse(Bool, ENV["USE_GPU"] ) : false
const do_viz = haskey(ENV, "DO_VIZ" ) ? parse(Bool, ENV["DO_VIZ"] ) : true
const do_save = haskey(ENV, "DO_SAVE" ) ? parse(Bool, ENV["DO_SAVE"] ) : false
const nx = haskey(ENV, "NX" ) ? parse(Int , ENV["NX"] ) : 256 - 1
const ny = haskey(ENV, "NY" ) ? parse(Int , ENV["NY"] ) : 256 - 1
###
using ParallelStencil
using ParallelStencil.FiniteDifferences2D
@static if USE_GPU
@init_parallel_stencil(CUDA, Float64, 2)
else
@init_parallel_stencil(Threads, Float64, 2)
end
using Plots, Printf, Statistics, LinearAlgebra
@parallel function smooth!(A2::Data.Array, A::Data.Array, fact::Data.Number)
@inn(A2) = @inn(A) + 1.0/4.1/fact*(@d2_xi(A) + @d2_yi(A))
return
end
@parallel function compute_maxloc!(Musτ2::Data.Array, Musτ::Data.Array)
@inn(Musτ2) = @maxloc(Musτ)
return
end
@parallel function compute_iter_params!(dτ_Rho::Data.Array, Gdτ::Data.Array, Musτ::Data.Array, Vpdτ::Data.Number, Re::Data.Number, r::Data.Number, max_lxy::Data.Number)
@all(dτ_Rho) = Vpdτ*max_lxy/Re/@all(Musτ)
@all(Gdτ) = Vpdτ^2/@all(dτ_Rho)/(r+2.0)
return
end
@parallel function compute_P!(∇V::Data.Array, Pt::Data.Array, Vx::Data.Array, Vy::Data.Array, Gdτ::Data.Array, r::Data.Number, dx::Data.Number, dy::Data.Number)
@all(∇V) = @d_xa(Vx)/dx + @d_ya(Vy)/dy
@all(Pt) = @all(Pt) - r*@all(Gdτ)*@all(∇V)
return
end
@parallel function compute_τ!(τxx::Data.Array, τyy::Data.Array, τxy::Data.Array, Vx::Data.Array, Vy::Data.Array, Mus::Data.Array, Gdτ::Data.Array, dx::Data.Number, dy::Data.Number)
@all(τxx) = (@all(τxx) + 2.0*@all(Gdτ)*@d_xa(Vx)/dx)/(@all(Gdτ)/@all(Mus) + 1.0)
@all(τyy) = (@all(τyy) + 2.0*@all(Gdτ)*@d_ya(Vy)/dy)/(@all(Gdτ)/@all(Mus) + 1.0)
@all(τxy) = (@all(τxy) + 2.0*@av(Gdτ)*(0.5*(@d_yi(Vx)/dy + @d_xi(Vy)/dx)))/(@av(Gdτ)/@av(Mus) + 1.0)
return
end
@parallel function compute_dV!(Rx::Data.Array, Ry::Data.Array, dVx::Data.Array, dVy::Data.Array, Pt::Data.Array, τxx::Data.Array, τyy::Data.Array, τxy::Data.Array, dτ_Rho::Data.Array, dx::Data.Number, dy::Data.Number)
@all(Rx) = @d_xi(τxx)/dx + @d_ya(τxy)/dy - @d_xi(Pt)/dx
@all(Ry) = @d_yi(τyy)/dy + @d_xa(τxy)/dx - @d_yi(Pt)/dy
@all(dVx) = @av_xi(dτ_Rho)*@all(Rx)
@all(dVy) = @av_yi(dτ_Rho)*@all(Ry)
return
end
@parallel function compute_V!(Vx::Data.Array, Vy::Data.Array, dVx::Data.Array, dVy::Data.Array)
@inn(Vx) = @inn(Vx) + @all(dVx)
@inn(Vy) = @inn(Vy) + @all(dVy)
return
end
@parallel_indices (iy) function bc_x!(A::Data.Array)
A[1 , iy] = A[2 , iy]
A[end, iy] = A[end-1, iy]
return
end
@parallel_indices (ix) function bc_y!(A::Data.Array)
A[ix, 1 ] = A[ix, 2 ]
A[ix, end] = A[ix, end-1]
return
end
@views function Stokes2D_()
# Physics
lx, ly = 10.0, 10.0 # domain extends
μs0 = 1.0 # matrix viscosity
μsi = 1e-3 # inclusion viscosity
εbg = 1.0 # background strain-rate
# Numerics
iterMax = 1e5 # maximum number of pseudo-transient iterations
nout = 500 # error checking frequency
ε = 1e-8 # nonlinear absolute tolerence
CFL = 0.9/sqrt(2)
Re = 5π
r = 1.0
# nx, ny = 1*128-1, 1*128-1 # numerical grid resolution; should be a mulitple of 32-1 for optimal GPU perf
# Derived numerics
dx, dy = lx/nx, ly/ny # cell sizes
max_lxy = max(lx,ly)
Vpdτ = min(dx,dy)*CFL
xc, yc, yv = LinRange(dx/2, lx - dx/2, nx), LinRange(dy/2, ly - dy/2, ny), LinRange(0, ly, ny+1)
# Array allocations
Pt = @zeros(nx ,ny )
∇V = @zeros(nx ,ny )
τxx = @zeros(nx ,ny )
τyy = @zeros(nx ,ny )
τxy = @zeros(nx-1,ny-1)
Rx = @zeros(nx-1,ny-2)
Ry = @zeros(nx-2,ny-1)
dVx = @zeros(nx-1,ny-2)
dVy = @zeros(nx-2,ny-1)
Mus2 = @zeros(nx ,ny )
Musτ = @zeros(nx ,ny )
Gdτ = @zeros(nx ,ny )
dτ_Rho = @zeros(nx ,ny )
# Initial conditions
Rad2 = zeros(nx ,ny )
Vx = zeros(nx+1,ny )
Vy = zeros(nx ,ny+1)
Rad2 .= [((ix-1)*dx +0.5*dx -0.5*lx)^2 + ((iy-1)*dy +0.5*dy -0.5*ly)^2 for ix=1:size(Rad2,1), iy=1:size(Rad2,2)]
Vx = Data.Array( -εbg.*[((ix-1)*dx -0.5*lx) for ix=1:size(Vx,1), iy=1:size(Vx,2)] )
Vy = Data.Array( εbg.*[((iy-1)*dy -0.5*ly) for ix=1:size(Vy,1), iy=1:size(Vy,2)] )
Mus = μs0*ones(nx,ny)
Mus[Rad2.<1.0] .= μsi
Mus = Data.Array( Mus )
Mus2 .= Mus
for ism=1:10
@parallel smooth!(Mus2, Mus, 1.0)
Mus, Mus2 = Mus2, Mus
end
Musτ .= Mus
@parallel compute_maxloc!(Musτ, Mus)
@parallel (1:size(Musτ,2)) bc_x!(Musτ)
@parallel (1:size(Musτ,1)) bc_y!(Musτ)
# Time loop
@parallel compute_iter_params!(dτ_Rho, Gdτ, Musτ, Vpdτ, Re, r, max_lxy)
err=2*ε; iter=0; err_evo1=[]; err_evo2=[]
while err > ε && iter <= iterMax
if (iter==11) global wtime0 = Base.time() end
@parallel compute_P!(∇V, Pt, Vx, Vy, Gdτ, r, dx, dy)
@parallel compute_τ!(τxx, τyy, τxy, Vx, Vy, Mus, Gdτ, dx, dy)
@parallel compute_dV!(Rx, Ry, dVx, dVy, Pt, τxx, τyy, τxy, dτ_Rho, dx, dy)
@parallel compute_V!(Vx, Vy, dVx, dVy)
@parallel (1:size(Vx,1)) bc_y!(Vx)
@parallel (1:size(Vy,2)) bc_x!(Vy)
iter += 1
if iter % nout == 0
Vmin, Vmax = minimum(Vx), maximum(Vx)
Pmin, Pmax = minimum(Pt), maximum(Pt)
norm_Rx = norm(Rx)/(Pmax-Pmin)*lx/sqrt(length(Rx))
norm_Ry = norm(Ry)/(Pmax-Pmin)*lx/sqrt(length(Ry))
norm_∇V = norm(∇V)/(Vmax-Vmin)*lx/sqrt(length(∇V))
# norm_Rx = norm(Rx)/length(Rx); norm_Ry = norm(Ry)/length(Ry); norm_∇V = norm(∇V)/length(∇V)
err = maximum([norm_Rx, norm_Ry, norm_∇V])
push!(err_evo1, maximum([norm_Rx, norm_Ry, norm_∇V])); push!(err_evo2,iter)
@printf("Total steps = %d, err = %1.3e [norm_Rx=%1.3e, norm_Ry=%1.3e, norm_∇V=%1.3e] \n", iter, err, norm_Rx, norm_Ry, norm_∇V)
end
end
# Performance
wtime = Base.time() - wtime0
A_eff = (3*2)/1e9*nx*ny*sizeof(Data.Number) # Effective main memory access per iteration [GB] (Lower bound of required memory access: Te has to be read and written: 2 whole-array memaccess; Ci has to be read: : 1 whole-array memaccess)
wtime_it = wtime/(iter-10) # Execution time per iteration [s]
T_eff = A_eff/wtime_it # Effective memory throughput [GB/s]
@printf("Total steps = %d, err = %1.3e, time = %1.3e sec (@ T_eff = %1.2f GB/s) \n", iter, err, wtime, round(T_eff, sigdigits=2))
# Visualisation
if do_viz
p1 = heatmap(xc, yc, Array(Musτ)', aspect_ratio=1, xlims=extrema(xc), ylims=extrema(yc), c=:viridis, title="Pressure")
p2 = heatmap(xc, yv, Array(Vy)', aspect_ratio=1, xlims=extrema(xc), ylims=extrema(yv), c=:viridis, title="Vy")
p4 = heatmap(xc[2:end-1], yv[2:end-1], log10.(abs.(Array(Ry)')), aspect_ratio=1, xlims=extrema(xc[2:end-1]), ylims=extrema(yv[2:end-1]), c=:viridis, title="log10(Ry)")
p5 = plot(err_evo2,err_evo1, legend=false, xlabel="# iterations", ylabel="log10(error)", linewidth=2, markershape=:circle, markersize=3, labels="max(error)", yaxis=:log10)
display(plot(p1, p2, p4, p5))
end
if do_save
!ispath("../output") && mkdir("../output")
open("../output/out_Stokes2D.txt","a") do io
println(io, "$(nx) $(ny) $(iter)")
end
end
return xc, yc, Pt
end
if use_return
xc, yc, P = Stokes2D_();
else
Stokes2D = begin Stokes2D_(); return; end
end