-
Notifications
You must be signed in to change notification settings - Fork 9
/
advance_mu_t_kernel.cu
200 lines (157 loc) · 7.52 KB
/
advance_mu_t_kernel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <string.h>
#include "config_flags.h"
#include "advance_mu_t_cu.h"
#define I3(i,k,j) ((j) * kdim * idim + (k) * idim + (i))
#define I2(i,j) ((j) * idim + (i))
#define min(a,b) ((a)<(b)?(a):(b))
#define max(a,b) ((a)>(b)?(a):(b))
__global__ void advance_mu_t_kernel( float * __restrict__ ww, float * __restrict__ ww_1, float * __restrict__ u, float * __restrict__ u_1,
float *v, float *v_1,
float * __restrict__ mu, float * __restrict__ mut, float * __restrict__ muave, float * __restrict__ muts,
float * __restrict__ muu,float * __restrict__ muv,
float * __restrict__ mudf, float * __restrict__ t, float * __restrict__ t_1,
float * __restrict__ t_ave, float * __restrict__ ft, float * __restrict__ mu_tend,
float rdx, float rdy, float dts, float epssm,
float * __restrict__ dnw, float * __restrict__ fnm, float * __restrict__ fnp, float * __restrict__ rdnw,
float * __restrict__ msfuy, float * __restrict__ msfvx_inv,
float * __restrict__ msftx, float * __restrict__ msfty,
float * __restrict__ wdtn, float * __restrict__ dvdxi, float * __restrict__ dmdt,
config_flags config,
int ids, int ide, int jds, int jde, int kds, int kde,
int idim, int jdim, int kdim,
int its, int ite, int jts, int jte, int kts, int kte )
{
int i, j, k;
int i_start, i_end, j_start, j_end, k_start, k_end;
//<DESCRIPTION>
//
// advance_mu_t advances the explicit perturbation theta equation and the mass
// conservation equation. In addition, the small timestep omega is updated,
// and some quantities needed in other places are squirrelled away.
//
//</DESCRIPTION>
// now, the real work.
// set the loop bounds taking into account boundary conditions.
i_start = its;
i_end = min(ite,ide-1);
j_start = jts;
j_end = min(jte,jde-1);
k_start = kts;
k_end = kte-1;
if( !config.periodic_x ){
if( config.specified || config.nested ) {
i_start = max(its,ids+1);
i_end = min(ite,ide-2);
}
}
if( config.specified || config.nested ) {
j_start = max(jts,jds+1);
j_end = min(jte,jde-2);
}
// CALCULATION OF WW (dETA/dt)
i = threadIdx.x + blockIdx.x * blockDim.x;
j = blockIdx.y;
if (j >= j_start && j <= j_end){
if (i >= i_start && i <= i_end){
float msftx_r = msftx[I2(i,j)];
float msfty_r = msfty[I2(i,j)];
float muv_r = muv[I2(i,j)];
float muv_r_1 = muv[I2(i,j+1)];
float msfvx_inv_r = msfvx_inv[I2(i,j)];
float msfvx_inv_r_1 = msfvx_inv[I2(i,j+1)];
float muu_r = muu[I2(i,j)];
float muu_r_1 = muu[I2(i+1,j)];
float msfuy_r = msfuy[I2(i,j)];
float msfuy_r_1 = msfuy[I2(i+1,j)];
dmdt[I2(i,j)] = 0.0f;
// NOTE: mu is not coupled with the map scale factor.
// ww (omega) IS coupled with the map scale factor.
// Being coupled with the map scale factor means
// multiplication by (1/msft) in this case.
// Comments on map scale factors
// ADT eqn 47:
// partial drho/dt = -mx*my[partial d/dx(rho u/my) + partial d/dy(rho v/mx)]
// -partial d/dz(rho w)
// with rho -> mu, dividing by my, and with partial d/dnu(rho nu/my [=ww])
// as the final term (because we're looking for d_nu_/dt)
//
// begin by integrating with respect to nu from bottom to top
// BCs are ww=0 at both
// final term gives 0
// first term gives Integral([1/my]partial d mu/dt) over total column = dm/dt
// RHS remaining is Integral(-mx[partial d/dx(mu u/my) +
// partial d/dy(mu v/mx)]) over column
// lines below find RHS terms at each level set dmdt = sum over all levels
//
// [don't divide the below by msfty until find ww, since dmdt is used in
// the meantime]
for(k = k_start; k <= k_end; k++){
dvdxi[I3(i,k,j)] = msftx_r*msfty_r*(
rdy*( (v[I3(i,k,j+1)]+muv_r_1*v_1[I3(i,k,j+1)]*msfvx_inv_r_1)
-(v[I3(i,k,j )]+muv_r *v_1[I3(i,k,j )]*msfvx_inv_r) )
+rdx*( (u[I3(i+1,k,j)]+muu_r_1*u_1[I3(i+1,k,j)]/msfuy_r_1)
-(u[I3(i ,k,j)]+muu_r *u_1[I3(i ,k,j)]/msfuy_r) ));
dmdt[I2(i,j)] = dmdt[I2(i,j)] + dnw[k]*dvdxi[I3(i,k,j)];
}
muave[I2(i,j)] = mu[I2(i,j)];
mu[I2(i,j)] = mu[I2(i,j)]+dts*(dmdt[I2(i,j)]+mu_tend[I2(i,j)]);
mudf[I2(i,j)] = (dmdt[I2(i,j)]+mu_tend[I2(i,j)]); // save tendency for div damp filter
muts[I2(i,j)] = mut[I2(i,j)]+mu[I2(i,j)];
muave[I2(i,j)] = 0.5f*((1.0f+epssm)*mu[I2(i,j)]+(1.0f-epssm)*muave[I2(i,j)]);
for(k = 1; k <= k_end; k++){
ww[I3(i,k,j)] = ww[I3(i,k-1,j)]-dnw[k-1]*(dmdt[I2(i,j)]+dvdxi[I3(i,k-1,j)]
+mu_tend[I2(i,j)])/msfty_r;
}
// NOTE: ww_1 (large timestep ww) is already coupled with the
// map scale factor
for(k = 0; k <= k_end; k++){
ww[I3(i,k,j)] = ww[I3(i,k,j)]-ww_1[I3(i,k,j)];
}
// CALCULATION OF THETA
// NOTE: theta'' is not coupled with the map-scale factor,
// while the theta'' tendency is coupled (i.e., mult by 1/msft)
// Comments on map scale factors
// BUT NOTE THAT both are mass coupled
// in flux form equations (Klemp et al.) Theta = mu*theta
//
// scalar eqn: partial d/dt(rho q/my) = -mx[partial d/dx(q rho u/my) +
// partial d/dy(q rho v/mx)]
// - partial d/dz(q rho w/my)
// with rho -> mu, and with partial d/dnu(q rho nu/my) as the final term
//
// adding previous tendency contribution which was map scale factor coupled
// (had been divided by msfty)
// need to uncouple before updating uncoupled Theta (by adding)
for(k = 0; k <= k_end; k++){
t_ave[I3(i,k,j)] = t[I3(i,k,j)];
t[I3(i,k,j)] = t[I3(i,k,j)] + msfty_r*dts*ft[I3(i,k,j)];
}
wdtn[I3(i,0,j)] = 0.0f;
wdtn[I3(i,kde,j)] = 0.0f;
for(k = 1; k <= k_end; k++){
// for scalar eqn RHS term 3
wdtn[I3(i,k,j)] = ww[I3(i,k,j)]*(fnm[k]*t_1[I3(i,k,j)]+fnp[k]
*t_1[I3(i,k-1,j)]);
}
// scalar eqn, RHS terms 1, 2 and 3
// multiply by msfty to uncouple result for Theta from map scale factor
for(k = 0; k <= k_end; k++){
// multiplication by msfty uncouples result for Theta
t[I3(i,k,j)] = t[I3(i,k,j)] - dts*msfty_r*(
// multiplication by mx needed for RHS terms 1 & 2
msftx_r*(
0.5f*rdy*
( v[I3(i,k,j+1)]*(t_1[I3(i,k,j+1)]+t_1[I3(i,k,j )])
-v[I3(i,k,j )]*(t_1[I3(i,k,j )]+t_1[I3(i,k,j-1)]) )
+ 0.5f*rdx*
( u[I3(i+1,k,j)]*(t_1[I3(i+1,k,j)]+t_1[I3(i ,k,j)])
-u[I3(i ,k,j)]*(t_1[I3(i ,k,j)]+t_1[I3(i-1,k,j)]) ) )
+ rdnw[k]*( wdtn[I3(i,k+1,j)]-wdtn[I3(i,k,j)] ) );
}
} // i
} // j
}