Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MPI + OpenACC code, make file, and batch file #9

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/laplace/program/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
MF= Makefile

CC= mpicc

CFLAGS= -O3
LFLAGS= $(CFLAGS) -lm

EXE= hybrid_mpiACC

INC=


SRC= \
hybrid_mpiACC.c \

#
# No need to edit below this line
#

.SUFFIXES:
.SUFFIXES: .c .o

OBJ= $(SRC:.c=.o)

.c.o:
$(CC) $(CFLAGS) -c $<

all: $(EXE)

$(OBJ): $(INC)

$(EXE): $(OBJ)
$(CC) $(CFLAGS) -o $@ $(OBJ) $(LFLAGS)

$(OBJ): $(MF)

clean:
rm -f $(OBJ) $(EXE) core
237 changes: 237 additions & 0 deletions src/laplace/program/hybrid_mpiACC.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
/*************************************************
* Laplace hybrid MPI OpenACC Version
*
* Temperature is initially 0.0
* Boundaries are as follows:
*
* 0 T 0
* 0 +-------------------+ 0
* | |
* | |
* | |
* T | | T
* | |
* | |
* | |
* 0 +-------------------+ 100
* 0 T 100
*
* Modified by Kevin Ham from original by John Urbanic, PSC 2014
*
************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <sys/time.h>
#include <mpi.h>

// size of plate: small 672 large 10752
#define COLUMNS 10752 //
#define ROWS_GLOBAL 10752 // this is a "global" row count
#define NPES 28
#define ROWS (ROWS_GLOBAL/NPES)
#define UP 100
#define DOWN 101

// Use 10752 (16 times bigger) for large challenge problem
// All chosen to be easily divisible by Bridges' 28 cores per node

// largest permitted change in temp (This value takes 3264 steps)
#define MAX_TEMP_ERROR 0.01

double Temperature[ROWS+2][COLUMNS+2]; // temperature grid
double Temperature_last[ROWS+2][COLUMNS+2]; // temperature grid from last iteration

// helper routines
void initialize(int npes, int my_PE_num);
void track_progress(int iter, double dt);


int main(int argc, char** argv) {
int my_PE_num;
int i, j, npes; // grid indexes
int max_iterations; // number of iterations
int iteration=1; // current iteration
double dt; // largest change in t
double dt_global=100;
struct timeval start_time, stop_time, elapsed_time; // timers
//double start_time, stop_time, elapsed_time;
MPI_Status status; // status returned by MPI call
// the usual MPI startup routines
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_PE_num);
MPI_Comm_size(MPI_COMM_WORLD, &npes);

if (my_PE_num == 0)
{
printf("Running on %d MPI processes\n\n", npes);
}

// verify only NPES PEs are being used
if(npes != NPES) {
if(my_PE_num==0) {
printf("This code must be run with %d PEs\n", NPES);
}
MPI_Finalize();
exit(1);
}

// PE 0 asks for input
if(my_PE_num==0) {
// printf("Maximum iterations [100-4000]?\n");
// fflush(stdout); // Not always necessary, but can be helpful
// scanf("%d", &max_iterations);
max_iterations = 4000;
printf("Maximum iterations = %d\n", max_iterations);

}

// bcast max iterations to other PEs
MPI_Bcast(&max_iterations, 1, MPI_INT, 0, MPI_COMM_WORLD);

//if (my_PE_num==0) start_time = MPI_Wtime();

// printf("Maximum iterations [100-4000]?\n");
// scanf("%d", &max_iterations);
//max_iterations = 4000;
//printf("Maximum iterations = %d\n", max_iterations);

gettimeofday(&start_time,NULL); // Unix timer

initialize(npes, my_PE_num); // initialize Temp_last including boundary conditions

// do until error is minimal or until max steps
#pragma acc data copy(Temperature_last), create(Temperature)
while ( dt_global > MAX_TEMP_ERROR && iteration <= max_iterations ) {

// main calculation: average my four neighbors
#pragma acc kernels
for(i = 1; i <= ROWS; i++) {
for(j = 1; j <= COLUMNS; j++) {
Temperature[i][j] = 0.25 * (Temperature_last[i+1][j] + Temperature_last[i-1][j] +
Temperature_last[i][j+1] + Temperature_last[i][j-1]);
}
}
#pragma acc update host(Temperature[1:1][1:COLUMNS],Temperature[ROWS:1][1:COLUMNS])
// COMMUNICATION PHASE: send ghost rows for next iteration

// send bottom real row down
if(my_PE_num != npes-1){ //unless we are bottom PE
MPI_Send(&Temperature[ROWS][1], COLUMNS, MPI_DOUBLE, my_PE_num+1, DOWN, MPI_COMM_WORLD);
}

// receive the bottom row from above into our top ghost row
if(my_PE_num != 0){ //unless we are top PE
MPI_Recv(&Temperature_last[0][1], COLUMNS, MPI_DOUBLE, my_PE_num-1, DOWN, MPI_COMM_WORLD, &status);
}

// send top real row up
if(my_PE_num != 0){ //unless we are top PE
MPI_Send(&Temperature[1][1], COLUMNS, MPI_DOUBLE, my_PE_num-1, UP, MPI_COMM_WORLD);
}

// receive the top row from below into our bottom ghost row
if(my_PE_num != npes-1){ //unless we are bottom PE
MPI_Recv(&Temperature_last[ROWS+1][1], COLUMNS, MPI_DOUBLE, my_PE_num+1, UP, MPI_COMM_WORLD, &status);
}

#pragma acc update device(Temperature_last[0:1][1:COLUMNS], Temperature_last[ROWS+1:1][1:COLUMNS])
dt = 0.0; // reset largest temperature change

// copy grid to old grid for next iteration and find latest
#pragma acc kernels
for(i = 1; i <= ROWS; i++){
for(j = 1; j <= COLUMNS; j++){
dt = fmax( fabs(Temperature[i][j]-Temperature_last[i][j]), dt);
Temperature_last[i][j] = Temperature[i][j];
}
}
// find global dt
MPI_Reduce(&dt, &dt_global, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Bcast(&dt_global, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);

// periodically print test values
if((iteration % 100) == 0) {
#pragma acc update host(Temperature)
track_progress(iteration, dt);
// if plate size = 672 use 504 and 622
// if plate size = 10752 use 8064 and 10702
if (8064/ROWS == my_PE_num+1){
printf("PE %d: T(8064,10702) = %f\n", my_PE_num,Temperature[ROWS][10702]);
}
}

iteration++;
}
// Slightly more accurate timing and cleaner output
MPI_Barrier(MPI_COMM_WORLD);

// PE 0 finish timing and output values
if (my_PE_num==0){
//stop_time = MPI_Wtime();
//elapsed_time = stop_time - start_time;
gettimeofday(&stop_time,NULL);
timersub(&stop_time,&start_time,&elapsed_time); // Unix time subtract routine
printf("\nMax error at iteration %d was %20.15g\n", iteration-1, dt_global);
// printf("Total time was %f seconds.\n", elapsed_time);
printf("Total time was %f seconds.\n", elapsed_time.tv_sec+elapsed_time.tv_usec/1000000.0);
}

MPI_Finalize();

//printf("\nMax error at iteration %d was %20.15g\n", iteration-1, dt);
//printf("Total time was %f seconds.\n", elapsed_time.tv_sec+elapsed_time.tv_usec/1000000.0);

}


// initialize plate and boundary conditions
// Temp_last is used to to start first iteration
void initialize(int npes, int my_PE_num){

double tMin, tMax; //Local boundary limits
int i,j;

for(i = 0; i <= ROWS+1; i++){
for (j = 0; j <= COLUMNS+1; j++){
Temperature_last[i][j] = 0.0;
}
}

// Local boundry condition endpoints
tMin = (my_PE_num)*100.0/npes;
tMax = (my_PE_num+1)*100.0/npes;

// Left and right boundaries
for (i = 0; i <= ROWS+1; i++) {
Temperature_last[i][0] = 0.0;
Temperature_last[i][COLUMNS+1] = tMin + ((tMax-tMin)/ROWS)*i;
}

// Top boundary (PE 0 only)
if (my_PE_num == 0)
for (j = 0; j <= COLUMNS+1; j++)
Temperature_last[0][j] = 0.0;

// Bottom boundary (Last PE only)
if (my_PE_num == npes-1)
for (j=0; j<=COLUMNS+1; j++)
Temperature_last[ROWS+1][j] = (100.0/COLUMNS) * j;

}

// only called by last PE
// print diagonal in bottom right corner where most action is
void track_progress(int iteration, double dt) {

int i;

printf("---- Iteration %d, dt = %f ----\n", iteration, dt);
for(i = 5; i >= 3; i--) {
printf("[%d,%d]: %5.2f ", ROWS_GLOBAL-i, COLUMNS-i, Temperature[ROWS-i][COLUMNS-i]);
}
printf("\n");

}

43 changes: 43 additions & 0 deletions src/laplace/program/hybrid_mpiACC.job
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

# This is a simple template script to run batch jobs on Bridges at PSC
#
# You only have to do two things to run an MPI program with it:
#
# 1) Make a copy of the script with the same name as your MPI executable,
# eg if the executable is 'program' then type: cp mpibatch.job program.job
#
# 2) Set the number of MPI processes NPROC (and "-N" if NPROC > 28)
#
# To run: sbatch program.pbs
#
# All screen output (stdout and stderr) will appear in a file called
# slurm-XXXXX.out, where XXXXX is the job number assigned at submit time.
#
# David Henty, EPCC, 25/06/2017
#

#SBATCH -p GPU-shared
#SBATCH --res=ihssgpu
#SBATCH --gres=gpu:p100:1
#SBATCH -t 00:10:00
#SBATCH -N 1
#SBATCH --ntasks-per-node 28


NPROC=28

MPIPROG=`basename $SLURM_JOB_NAME .job`
MPISIZE=$NPROC

echo '--------------------------------------------------------------------------------'

echo 'Running MPI program' $MPIPROG 'on' $MPISIZE 'processes'

echo 'Started at' `date`
echo '--------------------------------------------------------------------------------'

(time mpirun -n $MPISIZE ./$MPIPROG) 2>&1

echo '--------------------------------------------------------------------------------'
echo 'Finished at' `date`