Skip to content

Commit 3fd4800

Browse files
authored
Merge pull request UW-Hydro#735 from jhamman/feature/openmp_threading
Feature/openmp threading
2 parents 49b48d2 + 74dc13e commit 3fd4800

File tree

11 files changed

+102
-47
lines changed

11 files changed

+102
-47
lines changed

ci/image.travis

+4
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ function vic_script {
8181
$DRIVER_EXE -v
8282
$DRIVER_EXE -o
8383

84+
# Set the number of OpenMP threads to use
85+
# https://docs.travis-ci.com/user/languages/c/#OpenMP-projects
86+
export OMP_NUM_THREADS=4
87+
8488
# Run test package
8589
./tests/run_tests.py unit examples system \
8690
--image=${DRIVER_EXE} \

docs/Development/ReleaseNotes.md

+4
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ To check which release of VIC you are running:
9595

9696
Codified behavior in the initialization of the ``image`` and `cesm` drivers that requires the parameter variables `AreaFract`, `Pfactor`, `zone_fract`, and `Cv` must sum exactly to 1.0. If using the `SNOW_BAND` option, the area weighted `elevation` must match the mean grid cell elevation (`elev`). VIC will print *warnings* if any of these criteria are violated.
9797

98+
7. Added thread parallelization using OPENMP ([GH#712](https://github.com/UW-Hydro/VIC/pull/712))
99+
100+
The VIC image and CESM drivers now may be optionally compiled with OPENMP to enable shared memory thread parallelization. This option should improve the parallel scaling of these drivers by reducing the number of MPI messages and increasing message size.
101+
98102
## VIC 5.0.1
99103

100104
**Release date: (February 1, 2017)**

docs/Documentation/Drivers/Image/RunVIC.md

+17-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ The Image Driver has three dependencies:
1919

2020
3. [netCDF4](http://www.unidata.ucar.edu/software/netcdf/)
2121

22+
!!! Note
23+
Compiling the Image Driver may also be done with [OpenMP](http://www.openmp.org/). Nearly all modern C compilers include the [OpenMP standard](http://www.openmp.org/resources/openmp-compilers/) and users will need to ensure that the makefile has the appropriate compiler flag (usually `-fopenmp`). See the discussion below for how to control OpenMP parallelization.
24+
2225
## Compiling
2326
In most cases, you will need to edit the `NETCDF_PATH` and `MPI_PATH` variables in the `Makefile`.
2427

@@ -48,11 +51,23 @@ At the command prompt, type:
4851

4952
where `global_parameter_filename` = name of the global parameter file corresponding to your project.
5053

51-
To run VIC image driver using multiple processors, type the following instead:
54+
The VIC image driver can be run using parallel processing with MPI and/or OpenMP.
55+
56+
!!! Note
57+
Users are encouraged to consult their system administrator for assistance in configuring the VIC image driver for parallel processing applications.
58+
59+
To run VIC image driver using multiple processors using MPI, type the following instead:
5260

5361
mpiexec -np $n_proc ./vic_image.exe -g global_parameter_filename.txt
5462

55-
where `n_proc` = number of processors to be used
63+
where `n_proc` = number of processors to be used. *Note that different MPI implementations may use different names for the MPI executable such as: `mpirun`, `mpiexec_mpt`, or `mpiexec.hydra`*.
64+
65+
To run the VIC image driver using multiple processors with OpenMP (threads), set the environment variable `OMP_NUM_THREADS`:
66+
67+
export OMP_NUM_THREADS=8
68+
./vic_image.exe -g global_parameter_filename.txt
69+
70+
These two parallelization methods may also be combined using a Hybrid OpenMP/MPI approach. However, that configuration is usually machine, compiler, or scheduler dependent.
5671

5772
## Other Command Line Options
5873

tests/test_restart.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def check_exact_restart_fluxes(result_basedir, driver, run_periods):
260260
for var in ds_full_run.data_vars:
261261
np.testing.assert_array_equal(
262262
ds[var].values, ds_full_run_split_period[var].values,
263-
err_msg='Fluxes are not an exact match')
263+
err_msg='Fluxes are not an exact match for %s' % var)
264264

265265

266266
def check_exact_restart_states(state_basedir, driver, run_periods,
@@ -365,7 +365,7 @@ def check_exact_restart_states(state_basedir, driver, run_periods,
365365
np.testing.assert_array_equal(ds_states[var].values,
366366
ds_states_full_run[var].values,
367367
err_msg='states are not an '
368-
'exact match')
368+
'exact match for %s' % var)
369369

370370

371371
def read_ascii_state(state_fname):

vic/drivers/cesm/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ LIBRARY = -lm -L${NETCDFPATH}/lib -lnetcdf
8888

8989
# Set compiler flags
9090
CFLAGS = ${INCLUDES} -ggdb -O0 -Wall -Wextra -fPIC \
91+
-fopenmp \
9192
-DLOG_LVL=$(LOG_LVL) \
9293
-DGIT_VERSION=\"$(GIT_VERSION)\" \
9394
-DUSERNAME=\"$(USER)\" \

vic/drivers/image/Makefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ INCLUDES = -I ${DRIVERPATH}/include \
7474
-I ${SHAREDIMAGEPATH}/include
7575

7676
# Uncomment to include debugging information
77-
CFLAGS = ${INCLUDES} ${NC_CFLAGS} -ggdb -O0 -Wall -Wextra -std=c99 \
77+
CFLAGS = ${INCLUDES} ${NC_CFLAGS} -ggdb -O0 -Wall -Wextra -std=c99 \
78+
-fopenmp \
7879
-DLOG_LVL=$(LOG_LVL) \
7980
-DGIT_VERSION=\"$(GIT_VERSION)\" \
8081
-DUSERNAME=\"$(USER)\" \

vic/drivers/shared_image/include/vic_mpi.h

+5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929

3030
#include <vic_def.h>
3131
#include <mpi.h>
32+
#ifdef _OPENMP
33+
#include <omp.h>
34+
#else
35+
#define omp_get_max_threads() 1
36+
#endif
3237

3338
#define VIC_MPI_ROOT 0
3439

vic/drivers/shared_image/src/vic_image_run.c

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ vic_image_run(dmy_struct *dmy_current)
5555
sprint_dmy(dmy_str, dmy_current);
5656
debug("Running timestep %zu: %s", current, dmy_str);
5757

58+
// If running with OpenMP, run this for loop using multiple threads
59+
#pragma omp parallel for default(shared) private(i, timer, vic_run_ref_str)
5860
for (i = 0; i < local_domain.ncells_active; i++) {
5961
// Set global reference string (for debugging inside vic_run)
6062
sprintf(vic_run_ref_str, "Gridcell io_idx: %zu, timestep info: %s",

vic/drivers/shared_image/src/vic_image_timing.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ write_vic_timing_table(timer_struct *timers,
4646
struct passwd *pw;
4747
double ndays;
4848
double nyears;
49+
int nprocs;
50+
int nthreads;
4951

5052
// datestr
5153
curr_date_time = time(NULL);
@@ -70,6 +72,10 @@ write_vic_timing_table(timer_struct *timers,
7072
strcpy(user, "unknown");
7173
}
7274

75+
// mpi/openmp
76+
nthreads = omp_get_max_threads();
77+
nprocs = mpi_size * nthreads;
78+
7379
// calculate run length
7480
ndays = global_param.dt * global_param.nrecs / SEC_PER_DAY;
7581
nyears = ndays / DAYS_PER_YEAR;
@@ -107,7 +113,9 @@ write_vic_timing_table(timer_struct *timers,
107113
global_param.atmos_dt);
108114
fprintf(LOG_DEST, "\n");
109115

110-
fprintf(LOG_DEST, " Total pes active : %d\n", mpi_size);
116+
fprintf(LOG_DEST, " MPI Processes : %d\n", mpi_size);
117+
fprintf(LOG_DEST, " OPENMP Threads : %d\n", nthreads);
118+
fprintf(LOG_DEST, " Total pes active : %d\n", nprocs);
111119
fprintf(LOG_DEST, " pes per node : %ld\n",
112120
sysconf(_SC_NPROCESSORS_ONLN));
113121

@@ -116,7 +124,7 @@ write_vic_timing_table(timer_struct *timers,
116124
fprintf(LOG_DEST, " Overall Metrics\n");
117125
fprintf(LOG_DEST, " ---------------\n");
118126
fprintf(LOG_DEST, " Model Cost : %g pe-hrs/simulated_year\n",
119-
mpi_size * timers[TIMER_VIC_ALL].delta_wall / SEC_PER_HOUR /
127+
nprocs * timers[TIMER_VIC_ALL].delta_wall / SEC_PER_HOUR /
120128
nyears);
121129
fprintf(LOG_DEST, " Model Throughput : %g simulated_years/day\n",
122130
nyears / (timers[TIMER_VIC_ALL].delta_wall / SEC_PER_DAY));

vic/vic_run/src/CalcBlowingSnow.c

+31-27
Original file line numberDiff line numberDiff line change
@@ -271,18 +271,18 @@ CalcBlowingSnow(double Dt,
271271
*****************************************************************************/
272272
double
273273
qromb(double (*funcd)(),
274-
double es,
275-
double Wind,
276-
double AirDens,
277-
double ZO,
278-
double EactAir,
279-
double F,
280-
double hsalt,
281-
double phi_r,
282-
double ushear,
283-
double Zrh,
284-
double a,
285-
double b)
274+
double es,
275+
double Wind,
276+
double AirDens,
277+
double ZO,
278+
double EactAir,
279+
double F,
280+
double hsalt,
281+
double phi_r,
282+
double ushear,
283+
double Zrh,
284+
double a,
285+
double b)
286286
{
287287
extern parameters_struct param;
288288

@@ -363,23 +363,27 @@ polint(double xa[],
363363
*****************************************************************************/
364364
double
365365
trapzd(double (*funcd)(),
366-
double es,
367-
double Wind,
368-
double AirDens,
369-
double ZO,
370-
double EactAir,
371-
double F,
372-
double hsalt,
373-
double phi_r,
374-
double ushear,
375-
double Zrh,
376-
double a,
377-
double b,
378-
int n)
366+
double es,
367+
double Wind,
368+
double AirDens,
369+
double ZO,
370+
double EactAir,
371+
double F,
372+
double hsalt,
373+
double phi_r,
374+
double ushear,
375+
double Zrh,
376+
double a,
377+
double b,
378+
int n)
379379
{
380-
double x, tnm, sum, del;
380+
double x, tnm, sum, del;
381+
int it, j;
382+
383+
// TODO: remove use of static variables (see GH #735), for now:
384+
// make static variables thread safe
381385
static double s;
382-
int it, j;
386+
#pragma omp threadprivate(s)
383387

384388
if (n == 1) {
385389
return (s = 0.5 *

vic/vic_run/src/frozen_soil.c

+24-13
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,18 @@ solve_T_profile(double *T,
170170
int NOFLUX,
171171
int EXP_TRANS)
172172
{
173+
double *aa, *bb, *cc, *dd, *ee, Bexp;
174+
int Error;
175+
int j;
176+
177+
// TODO: remove use of static variables (see GH #735), for now:
178+
// make static variables thread safe
173179
static double A[MAX_NODES];
174180
static double B[MAX_NODES];
175181
static double C[MAX_NODES];
176182
static double D[MAX_NODES];
177183
static double E[MAX_NODES];
178-
179-
double *aa, *bb, *cc, *dd, *ee, Bexp;
180-
181-
int Error;
182-
int j;
184+
#pragma omp threadprivate(A, B, C, D, E)
183185

184186
if (FIRST_SOLN[0]) {
185187
if (EXP_TRANS) {
@@ -646,6 +648,18 @@ fda_heat_eqn(double T_2[],
646648
int init,
647649
...)
648650
{
651+
char PAST_BOTTOM;
652+
double storage_term, flux_term, phase_term, flux_term1, flux_term2;
653+
double Lsum;
654+
int i;
655+
size_t lidx;
656+
int focus, left, right;
657+
658+
// argument list handling
659+
va_list arg_addr;
660+
661+
// TODO: remove use of static variables (see GH #735), for now:
662+
// make static variables thread safe
649663
static double deltat;
650664
static int NOFLUX;
651665
static int EXP_TRANS;
@@ -681,15 +695,12 @@ fda_heat_eqn(double T_2[],
681695
static double DT[MAX_NODES], DT_down[MAX_NODES], DT_up[MAX_NODES];
682696
static double Dkappa[MAX_NODES];
683697
static double Bexp;
684-
char PAST_BOTTOM;
685-
double storage_term, flux_term, phase_term, flux_term1, flux_term2;
686-
double Lsum;
687-
int i;
688-
size_t lidx;
689-
int focus, left, right;
690698

691-
// argument list handling
692-
va_list arg_addr;
699+
#pragma omp threadprivate(deltat, NOFLUX, EXP_TRANS, T0, moist, ice, \
700+
kappa, Cs, max_moist, bubble, expt, alpha, beta, gamma, Zsum, Dp, \
701+
bulk_dens_min, soil_dens_min, quartz, bulk_density, soil_density, organic, \
702+
depth, Nlayers, Ts, Tb, ice_new, Cs_new, kappa_new, DT, DT_down, DT_up, \
703+
Dkappa, Bexp)
693704

694705
// initialize variables if init==1
695706
if (init == 1) {

0 commit comments

Comments
 (0)