From 586191a79568c11287cd8998b785abbe2bbca8f4 Mon Sep 17 00:00:00 2001 From: Martyn Foster Date: Fri, 3 Nov 2023 10:44:45 +0000 Subject: [PATCH 01/32] Add DR_HOOK support for HW counters with PAPI --- CMakeLists.txt | 11 +- cmake/FindPAPI.cmake | 44 ++ src/fiat/CMakeLists.txt | 10 +- src/fiat/drhook/drhook.c | 584 +++++++++++------- src/fiat/drhook/drhook_papi.c | 305 +++++++++ src/fiat/drhook/drhook_papi.h | 42 ++ .../internal/drhook_run_omp_parallel.F90 | 43 ++ src/fiat/system/internal/opfla_perfmon.c | 2 +- src/programs/CMakeLists.txt | 28 + src/programs/fiat-drhook-sanity-gemm.F90 | 84 +++ src/programs/fiat-drhook-sanity-stream.F90 | 460 ++++++++++++++ src/programs/fiat-drhook-sanity.F90 | 74 +++ src/programs/mysecond.c | 27 + 13 files changed, 1499 insertions(+), 215 deletions(-) create mode 100644 cmake/FindPAPI.cmake create mode 100644 src/fiat/drhook/drhook_papi.c create mode 100644 src/fiat/drhook/drhook_papi.h create mode 100644 src/programs/fiat-drhook-sanity-gemm.F90 create mode 100644 src/programs/fiat-drhook-sanity-stream.F90 create mode 100644 src/programs/fiat-drhook-sanity.F90 create mode 100644 src/programs/mysecond.c diff --git a/CMakeLists.txt b/CMakeLists.txt index d204f98a..f175cf53 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,13 +22,17 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE) ### Options ecbuild_add_option( FEATURE OMP - DESCRIPTION "support for OpenMP shared memory parallelism" + DESCRIPTION "Support for OpenMP shared memory parallelism" REQUIRED_PACKAGES "OpenMP COMPONENTS Fortran" ) ecbuild_add_option( FEATURE MPI DESCRIPTION "Support for MPI distributed parallelism" REQUIRED_PACKAGES "MPI COMPONENTS Fortran" ) +ecbuild_add_option( FEATURE PAPI + DESCRIPTION "Support for HW counters in drhook via PAPI" + REQUIRED_PACKAGES "OpenMP COMPONENTS C" "PAPI") + ecbuild_find_package( fckit QUIET ) ecbuild_add_option( FEATURE FCKIT DESCRIPTION "Support for fckit" @@ -50,6 +54,11 @@ ecbuild_add_option( FEATURE WARNINGS DEFAULT ON DESCRIPTION "Add warnings to compiler" ) +ecbuild_add_option( FEATURE MKL + DESCRIPTION "Use MKL for BLAS and/or FFTW" + DEFAULT ON + REQUIRED_PACKAGES "MKL" ) + ecbuild_find_package( NAME Realtime QUIET ) ### Sources diff --git a/cmake/FindPAPI.cmake b/cmake/FindPAPI.cmake new file mode 100644 index 00000000..f778f51a --- /dev/null +++ b/cmake/FindPAPI.cmake @@ -0,0 +1,44 @@ +# Try to find PAPI headers and libraries. +# +# Usage of this module as follows: +# +# find_package(PAPI) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# PAPI_ROOT Set this variable to the root installation of +# libpapi if the module has problems finding the +# proper installation path. +# +# Variables defined by this module: +# +# PAPI_FOUND System has PAPI libraries and headers +# PAPI_LIBRARIES The PAPI library +# PAPI_INCLUDE_DIRS The location of PAPI headers + +find_path(PAPI_ROOT + NAMES include/papi.h +) + +find_library(PAPI_LIBRARIES + # Pick the static library first for easier run-time linking. + NAMES libpapi.so libpapi.a papi + HINTS ${PAPI_ROOT}/lib ${HILTIDEPS}/lib +) + +find_path(PAPI_INCLUDE_DIRS + NAMES papi.h + HINTS ${PAPI_ROOT}/include ${HILTIDEPS}/include +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PAPI DEFAULT_MSG + PAPI_LIBRARIES + PAPI_INCLUDE_DIRS +) + +mark_as_advanced( + PAPI_LIBRARIES + PAPI_INCLUDE_DIRS +) diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index be4a49ff..ecae913f 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -86,7 +86,15 @@ else() endif() if( HAVE_OMP ) - target_link_libraries( fiat PRIVATE OpenMP::OpenMP_Fortran ) + + target_link_libraries( fiat PRIVATE OpenMP::OpenMP_Fortran ) + +endif() + +if ( HAVE_PAPI ) + target_link_libraries ( fiat PRIVATE ${PAPI_LIBRARIES} ) + target_include_directories ( fiat PRIVATE ${PAPI_INCLUDE_DIRS} ) + target_compile_definitions ( fiat PRIVATE HKPAPI ) endif() fiat_target_ignore_missing_symbols( TARGET fiat SYMBOLS diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index e089be1a..65eb8190 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -1,7 +1,7 @@ /* * (C) Copyright 2005- ECMWF. * (C) Copyright 2013- Meteo-France. - * + * * This software is licensed under the terms of the Apache Licence Version 2.0 * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. * In applying this licence, ECMWF does not waive the privileges and immunities @@ -15,7 +15,7 @@ #define _GNU_SOURCE -/* +/* drhook.c Author: Sami Saarinen, ECMWF, 14..24-Nov-2003 @@ -84,6 +84,7 @@ static int backtrace(void **buffer, int size) { return 0; } #include #include "ec_get_cycles.h" +#include "drhook_papi.h" static long long int *thread_cycles = NULL; int drhook_lhook = 1; // NOTE: A global variable !! @@ -139,7 +140,7 @@ static void drhook_oml_init_lock() { } oml_init_lockid_with_name(&DRHOOK_lock, "drhook.c:DRHOOK_lock"); oml_set_debug(saved_state); -} +} #if !defined(CACHELINESIZE) #if defined(LEVEL1_DCACHE_LINESIZE) @@ -302,13 +303,14 @@ static int opt_getpag = 0; static int opt_walltime = 0; static int opt_cputime = 0; static int opt_wallprof = 0; +static int opt_papi = 0; static int opt_cpuprof = 0; static int opt_memprof = 0; static int opt_cycles = 0; static int opt_trim = 0; static int opt_calls = 0; -static int opt_self = 1; /* 0=exclude drhook altogether, - 1=include, but don't print, +static int opt_self = 1; /* 0=exclude drhook altogether, + 1=include, but don't print, 2=also print */ static int opt_propagate_signals = 1; static int opt_sizeinfo = 1; @@ -325,9 +327,9 @@ static int opt_funcenter = 0; static int opt_funcexit = 0; static int opt_timeline = 0; /* myproc or -1 [or 0 for --> timeline feature off (default)] */ -static int opt_timeline_thread = 1; /* thread-id control : +static int opt_timeline_thread = 1; /* thread-id control : <= 0 print for all threads - 1 -> #1 only [but curheap still SUM of all threads] (default), + 1 -> #1 only [but curheap still SUM of all threads] (default), n -> print for increasing number of threads separately : [1..n] */ static int opt_timeline_format = 1; /* if 1, print only {wall,hwm,rss,curheap} w/o labels "wall=" etc.; else fully expanded fmt */ static int opt_timeline_unitno = 6; /* Fortran unit number : default = 6 i.e. stdout */ @@ -340,7 +342,7 @@ static int opt_gencore_signal = 0; static int opt_random_memstat = 0; /* > 0 if to obtain random memory stats (maxhwm, maxstk) for tid=1. Updated when rand() % opt_random_memstat == 0 */ static double opt_trace_stack = 0; /* if > 0, a multiplier for OMP_STACKSIZE to monitor high master thread stack usage -- - -- implies opt_random_memstat = 1 (regardless of DR_HOOK_RANDOM_MEMSTAT setting) + -- implies opt_random_memstat = 1 (regardless of DR_HOOK_RANDOM_MEMSTAT setting) -- for master MPI task only (for the moment) */ static long long int drhook_oml_stacksize = 0; /* Slave stack size -- an indicative stack size even master thread should not exceed */ @@ -378,7 +380,7 @@ static drhook_timeline_t *timeline = NULL; #define SA_SIGINFO 0 #define SIG_EXTRA_ARGS /* empty */ #define SIG_PASS_EXTRA_ARGS /* empty */ -#else +#else #define SIG_EXTRA_ARGS , siginfo_t *sigcode, void *sigcontextptr #define SIG_PASS_EXTRA_ARGS , sigcode, sigcontextptr #endif @@ -460,7 +462,7 @@ typedef struct drhook_key_t { double wall_in, delta_wall_all, delta_wall_child; double cpu_in, delta_cpu_all, delta_cpu_child; long long int cycles_in, delta_cycles_all, delta_cycles_child; - char *filename; /* the filename where the 1st call (on this routine-name) + char *filename; /* the filename where the 1st call (on this routine-name) to dr_hook() occurred */ long long int sizeinfo; /* # of data elements, bytes, etc. */ long long int min_sizeinfo, max_sizeinfo; /* min & max of # of data elements, bytes, etc. */ @@ -470,6 +472,13 @@ typedef struct drhook_key_t { long long int maxmem_selfdelta, maxmem_alldelta; long long int mem_maxhwm, mem_maxrss, mem_maxstk, mem_maxpagdelta; long long int paging_in; + +#ifdef HKPAPI + long_long counters_in[NPAPICNTRS]; + long_long delta_counters_all[NPAPICNTRS]; + long_long delta_counters_child[NPAPICNTRS]; +#endif + unsigned long long int alloc_count, free_count; struct drhook_key_t *next; } drhook_key_t; @@ -498,6 +507,10 @@ typedef struct drhook_prof_t { double pc; double total; double self; +#ifdef HKPAPI + long_long counter_tot[NPAPICNTRS]; + long_long counter_self[NPAPICNTRS]; +#endif unsigned long long int calls; double percall_ms_self; double percall_ms_total; @@ -593,7 +606,7 @@ static void set_ec_drhook_label(const char *hostname, long hlen) #define NSECS(x) ((int)(1000000000 * ((x) - SECS(x)))) #ifdef _DRHOOK_TIMER_T_ -static void set_killer_timer(const int *ntids, const int *target_omltid, +static void set_killer_timer(const int *ntids, const int *target_omltid, const int *target_sig, const double *start_time, const char *p, long plen) { @@ -617,13 +630,13 @@ static void set_killer_timer(const int *ntids, const int *target_omltid, sev.sigev_notify = SIGEV_SIGNAL; #endif sev.sigev_value.sival_ptr = &timerid; - + its.it_value.tv_sec = SECS(*start_time); its.it_value.tv_nsec = NSECS(*start_time); - + its.it_interval.tv_sec = 0; its.it_interval.tv_nsec = 0; - + #if defined(CLOCK_BOOTTIME) timer_create(CLOCK_BOOTTIME, &sev, &timerid); #else @@ -631,7 +644,7 @@ static void set_killer_timer(const int *ntids, const int *target_omltid, #endif /* timer_create(CLOCK_REALTIME, &sev, &timerid); */ timer_settime(timerid, 0, &its, NULL); - + cas_lock(&TimedKill); { fprintf(stderr, @@ -791,7 +804,7 @@ malloc_drhook(size_t size) void *p = malloc(size1); if (!p) { fprintf(stderr, - "***Error in malloc_drhook(): Unable to allocate space for %lld bytes\n", + "***Error in malloc_drhook(): Unable to allocate space for %lld bytes\n", (long long int)size1); DRHOOK_ABORT(); } @@ -1035,9 +1048,10 @@ insert_calltree(int tid, drhook_key_t *keyptr) /*--- remove_calltree ---*/ -static void -remove_calltree(int tid, drhook_key_t *keyptr, - const double *delta_wall, const double *delta_cpu, const long long int *delta_cycles) +static void +remove_calltree(int tid, drhook_key_t *keyptr, + const double *delta_wall, const double *delta_cpu, + const long long int *delta_cycles,long_long * delta_counters) { if (tid >= 1 && tid <= numthreads) { drhook_calltree_t *treeptr = thiscall[tid-1]; @@ -1046,6 +1060,12 @@ remove_calltree(int tid, drhook_key_t *keyptr, if (treeptr->prev) { drhook_key_t *parent_keyptr = treeptr->prev->keyptr; if (parent_keyptr) { /* extra security */ +#ifdef HKPAPI + drhook_papi_add(NULL, + parent_keyptr->delta_counters_child, + delta_counters + ); +#endif if (opt_walltime) { parent_keyptr->delta_wall_child += (*delta_wall); } @@ -1120,12 +1140,12 @@ memstat(drhook_key_t *keyptr, const int *thread_id, int in_getkey) if (opt_memprof) { keyptr->mem_seenmax = getmaxcurheap_thread_(thread_id); if (in_getkey) { /* Upon enter of a Dr.Hook'ed routine */ - /* A note for "keyptr->mem_curdelta": + /* A note for "keyptr->mem_curdelta": 1) do not reset to 0 2) initially calloc'ed to 0 while initializing the keydata[] ~ alias keyptr 3) remember the previous value --> catches memory leaks, too !! */ /* keyptr->mem_curdelta = 0; */ - /* Nearly the same holds for "keyptr->mem_child"; + /* Nearly the same holds for "keyptr->mem_child"; we need to capture the maximum/hwm for child */ /* keyptr->mem_child = 0; */ keyptr->paging_in = keyptr->paging; @@ -1344,7 +1364,7 @@ ignore_one_signal(int sig, int silent) { int tid = drhook_oml_get_thread_num(); char *pfx = PREFIX(tid); fprintf(stderr, - "%s %s [%s@%s:%d] DR_HOOK ignores signal#%d (%s)\n", + "%s %s [%s@%s:%d] DR_HOOK ignores signal#%d (%s)\n", pfx,TIMESTR(tid),FFL, sig,strsignal(sig)); } @@ -1356,7 +1376,7 @@ static void ignore_signals(int silent) { char *env = getenv("DR_HOOK_IGNORE_SIGNALS"); - + if (!silent && myproc == 1) { int tid = drhook_oml_get_thread_num(); char *pfx = PREFIX(tid); @@ -1439,7 +1459,7 @@ ignore_signals(int silent) static int set_unlimited_corefile(unsigned long long int *hardlimit, int enforce) { - /* + /* Make sure we *only* set soft-limit (not hard-limit) to 0 in our scripts i.e. : $ ulimit -S -c 0 but *not* @@ -1463,10 +1483,10 @@ static int set_unlimited_corefile(unsigned long long int *hardlimit, int enforce return rc; } -static void +static void signal_gencore(int sig SIG_EXTRA_ARGS) { - if (opt_gencore > 0) { + if (opt_gencore > 0) { opt_gencore = 0; /* A tiny chance for a race condition between threads */ if (sig == opt_gencore_signal && sig >= 1 && sig <= NSIG) { signal(sig, SIG_IGN); @@ -1512,7 +1532,7 @@ static char *safe_llitoa(long long int i, char b[], int blen) } -static void +static void signal_harakiri(int sig SIG_EXTRA_ARGS) { /* A signal handler that will force to exit the current thread immediately for sure */ @@ -1550,12 +1570,12 @@ signal_harakiri(int sig SIG_EXTRA_ARGS) #if 0 batch_kill_(); #endif - + raise(SIGKILL); /* Use raise, not RAISE here */ _exit(128+ABS(sig)); /* Should never reach here, bu' in case it does, then ... */ } -static void +static void signal_drhook(int sig SIG_EXTRA_ARGS) { volatile int nfirst = drhook_use_lockfile ? 0 : 1; @@ -1571,7 +1591,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) if (sig < 1 || sig > NSIG) return; // .. since have seen this, too :-( if (been_here_already++ > 0) return; // avoid calling more than once ... since it leads more often than not into troubles - + cas_lock(&thing); unixtid = ec_gettid(); @@ -1584,7 +1604,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) sigset_t newmask, oldmask; /* A tiny chance for a race condition between threads */ - // Using compare-and-swap -stuff from the include cas.h (also in ecProf) + // Using compare-and-swap -stuff from the include cas.h (also in ecProf) /* Signal catching */ { @@ -1593,24 +1613,24 @@ signal_drhook(int sig SIG_EXTRA_ARGS) } if (ec_drhook && tid >= 1 && tid <= numthreads) ec_drhook[tid-1].nsigs = nsigs; /* Store for possible signal_harakiri() */ - - /*------------------------------------------------------------ + + /*------------------------------------------------------------ Strategy: - drhook intercepts most interrupts. - - 1st interupt will + - 1st interupt will - call alarm(10) to try to make sure 2nd interrupt received - try to call tracebacks and exit (which includes atexits) - - 2nd (and subsequent) interupts will - - spin for 20 sec (to give 1st interrupt time to complete tracebacks) + - 2nd (and subsequent) interupts will + - spin for 20 sec (to give 1st interrupt time to complete tracebacks) - and then call _exit (bypassing atexit) ------------------------------------------------------------*/ - + /* if (sig != SIGTERM) signal(SIGTERM, SIG_DFL); */ /* Let the default SIGTERM to occur */ // max_threads = drhook_oml_get_max_threads(); if (nsigs == 1) { /*---- First call to signal handler: call alarm(drhook_harakiri_timeout), tracebacks, exit ------*/ - + if (!nfirst) { // Correct coding : one and only one task obtains exclusive creation mask -- others fire blanks! int fd = open(drhook_lockfile,O_CREAT|O_WRONLY|O_TRUNC|O_EXCL,S_IRUSR|S_IWUSR); @@ -1743,7 +1763,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) fprintf(stderr, "%s %s [%s@%s:%d] Signal#%d was caused by %s [memaddr=%p] [excepts=0x%x [%d]] : %p at %s(%s), nsigs = %d\n", pfx,TIMESTR(tid),FFL, - sig, s, + sig, s, addr, excepts, excepts, bt, @@ -1755,7 +1775,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) fprintf(stderr, "%s %s [%s@%s:%d] Signal#%d was caused by %s [memaddr=%p] : %p at %s(%s), nsigs = %d\n", pfx,TIMESTR(tid),FFL, - sig, s, + sig, s, addr, bt, dlinfo.dli_fname ? dlinfo.dli_fname : "", @@ -1805,17 +1825,17 @@ signal_drhook(int sig SIG_EXTRA_ARGS) } /* All below this point should be nsigs == 1 i.e. the first threat arriving signal_drhook() */ - + /* sigfillset(&newmask); -- dead code since sigprocmask() was not called */ /* sigemptyset(&newmask); sigaddset(&newmask, sig); */ - + /* Start critical region (we don't want any signals to interfere while doing this) */ /* sigprocmask(SIG_BLOCK, &newmask, &oldmask); */ - - if (nsigs == 1 && nfirst) { + + if (nsigs == 1 && nfirst) { /* Print Dr.Hook traceback */ const int ftnunitno = 0; /* stderr */ const int print_option = 2; /* calling tree */ @@ -1853,37 +1873,37 @@ signal_drhook(int sig SIG_EXTRA_ARGS) /* To make it less likely that another thread generates a signal while we are doing a traceback lets wait a while (seems to fix problems of the traceback terminating abnormally. Probably a better way of doing this involving holding - off signals but sigprocmask is not safe in multithreaded code - P Towers Dec 10 2012 + off signals but sigprocmask is not safe in multithreaded code - P Towers Dec 10 2012 This was originally an issue with the Intel compiler but may be of benefit for other - compilers. Cannot see it doing harm - P Towers Aug 29 2013 */ + compilers. Cannot see it doing harm - P Towers Aug 29 2013 */ // spin(MIN(5,tid)); // obsolete: only one thread (and task) ever gets here ! if (sig != SIGABRT && sig != SIGTERM) { #if (defined(LINUX) || defined(__APPLE__)) LinuxTraceBack(pfx,TIMESTR(tid),NULL); #endif - + #ifdef __INTEL_COMPILER intel_trbk_(); /* from ../utilities/gentrbk.F90 */ -#endif +#endif } - - fprintf(stderr, - "%s %s [%s@%s:%d] DrHook backtrace done for signal#%d, nsigs = %d\n", + + fprintf(stderr, + "%s %s [%s@%s:%d] DrHook backtrace done for signal#%d, nsigs = %d\n", pfx,TIMESTR(tid),FFL,sig,nsigs); } - + /* sigprocmask(SIG_SETMASK, &oldmask, 0); */ /* End critical region : the original signal state restored */ - + { int restored = 0, tdiff; time_t t1, t2; drhook_sigfunc_t u; u.func3args = signal_drhook; if (opt_propagate_signals && - sl->old.sa_handler != SIG_DFL && - sl->old.sa_handler != SIG_IGN && + sl->old.sa_handler != SIG_DFL && + sl->old.sa_handler != SIG_IGN && sl->old.sa_handler != u.func1args) { u.func1args = sl->old.sa_handler; @@ -1915,7 +1935,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) set_default_handler(SIGSEGV,1,1); restored = 1; break; - default: + default: break; } } @@ -1923,7 +1943,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) fprintf(stderr, "%s %s [%s@%s:%d] Calling previous signal handler at %p for signal#%d, nsigs = %d\n", pfx,TIMESTR(tid),FFL, - (void*) u.func1args,sig,nsigs); + (void*) u.func1args,sig,nsigs); time(&t1); u.func3args(sig SIG_PASS_EXTRA_ARGS); /* This could now be the ATP */ @@ -1934,7 +1954,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) "%s %s [%s@%s:%d] Returned from previous signal handler" " (at %p, signal#%d, time taken = %ds), nsigs = %d\n", pfx,TIMESTR(tid),FFL, - (void*) u.func1args,sig,tdiff,nsigs); + (void*) u.func1args,sig,tdiff,nsigs); if (atp_enabled && restored && atp_max_cores > 0) { /* Assuming it was indeed ATP, then lets spin a bit to allow other cores be dumped */ @@ -1973,7 +1993,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) } } } - + { int errcode = 128 + ABS(sig); /* Make sure that the process/thread really exits now -- immediately !! */ @@ -1988,7 +2008,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) /*--- signal_drhook_init ---*/ -static void +static void signal_drhook_init(int enforce) { char *env = getenv("DR_HOOK_SILENT"); @@ -1996,11 +2016,11 @@ signal_drhook_init(int enforce) int j; dr_hook_procinfo_(&myproc, &nproc); if (myproc < 1) myproc = 1; /* Just to enable output as if myproc was == 1 */ - /* Signals may not yet been set, since MPI not initialized + /* Signals may not yet been set, since MPI not initialized Only enforce-parameter can enforce to set these => no output on myproc=1 */ - if (!enforce && (myproc < 1 || nproc < 0)) return; + if (!enforce && (myproc < 1 || nproc < 0)) return; if (signals_set) return; /* Extra safety */ - /* To present sumpini.F90 (f.ex.) initializing DrHook-signals in case of + /* To present sumpini.F90 (f.ex.) initializing DrHook-signals in case of DR_HOOK was turned off (=0), then set also export DR_HOOK_INIT_SIGNALS=0 */ env = getenv("DR_HOOK_INIT_SIGNALS"); if (env && *env == '0') { @@ -2147,6 +2167,21 @@ get_memmon_out(int me) return s; } +/*--- get_memmon_out ---*/ + +static char * +get_csv_out(int me) +{ + char *s = NULL; + char *p = get_mon_out(me); + if (p) { + s = malloc_drhook((strlen(p) + 5) * sizeof(*s)); + sprintf(s,"%s.csv",p); + } + if (!s) s = strdup_drhook("drhook.prof.0.csv"); + return s; +} + /*--- random_memstat ---*/ static void @@ -2158,7 +2193,7 @@ random_memstat(int tid, int enforce) long long int maxhwm = getmaxhwm_(); long long int maxstk = getmaxstk_(); if (drhook_stacksize_threshold > 0 && maxstk > drhook_stacksize_threshold) { - /* Abort hopefully with traceback */ + /* Abort hopefully with traceback */ char *pfx = PREFIX(tid); long long int vmpeak = getvmpeak_() / (long long int) 1048576; long long int threshold = drhook_stacksize_threshold / (long long int) 1048576; @@ -2235,7 +2270,7 @@ process_options() unsigned long long int hardlimit = 0; int rc = set_unlimited_corefile(&hardlimit,1); if (rc == 0) { - OPTPRINT(fp,"%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n", + OPTPRINT(fp,"%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n", pfx,TIMESTR(tid),FFL,hardlimit,hardlimit); } } @@ -2283,7 +2318,7 @@ process_options() if (env) { opt_timeline = atoi(env); } - + if (opt_timeline) { OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline); @@ -2292,13 +2327,13 @@ process_options() opt_timeline_thread = atoi(env); } OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_THREAD=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline_thread); - + env = getenv("DR_HOOK_TIMELINE_FORMAT"); if (env) { opt_timeline_format = atoi(env); } OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_FORMAT=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline_format); - + env = getenv("DR_HOOK_TIMELINE_UNITNO"); if (env) { opt_timeline_unitno = atoi(env); @@ -2323,7 +2358,7 @@ process_options() env = getenv("DR_HOOK_TRACE_STACK"); if (env) { opt_trace_stack = atof(env); - if (opt_trace_stack < 0) + if (opt_trace_stack < 0) opt_trace_stack = 0; else { drhook_oml_stacksize = slave_stacksize(); @@ -2350,7 +2385,7 @@ process_options() } OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_RANDOM_MEMSTAT=%d (RAND_MAX=%d)\n",pfx,TIMESTR(tid),FFL,opt_random_memstat,RAND_MAX); - + env = getenv("DR_HOOK_HASHBITS"); if (env) { int value = atoi(env); @@ -2467,7 +2502,7 @@ process_options() if (opt_gencore) { OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_GENCORE=%d\n",pfx,TIMESTR(tid),FFL,opt_gencore); - + env = getenv("DR_HOOK_GENCORE_SIGNAL"); if (env) { int itmp = atoi(env); @@ -2488,7 +2523,7 @@ process_options() while (*p) { if (islower(*p)) *p = toupper(*p); p++; - } + } p = strtok(s,delim); /* if (p) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_OPT=\"",pfx,TIMESTR(tid)); */ if (p && fp) { @@ -2498,7 +2533,7 @@ process_options() while (p) { /* Assume that everything is OFF by default */ if (strequ(p,"ALL")) { /* all except profiler data */ - opt_gethwm = opt_getstk = opt_getrss = opt_getpag = opt_walltime = opt_cputime = opt_cycles = 1; + opt_papi = opt_gethwm = opt_getstk = opt_getrss = opt_getpag = opt_walltime = opt_cputime = opt_cycles = 1; opt_calls = 1; any_memstat++; OPTPRINT(fp,"%s%s",comma,"ALL"); comma = ","; @@ -2568,6 +2603,15 @@ process_options() opt_cycles = 1; OPTPRINT(fp,"%s%s",comma,"WALLPROF"); comma = ","; } + else if (strequ(p,"COUNTERS") ) { + opt_wallprof = 1; + opt_walltime = 1; + opt_cpuprof = 0; /* Note: Switches cpuprof OFF */ + opt_calls = 1; + opt_cycles = 1; + opt_papi = 1; + OPTPRINT(fp,"%s%s",comma,"COUNTERS"); comma = ","; + } else if (strequ(p,"CPUPROF")) { opt_cpuprof = 1; opt_cputime = 1; @@ -2603,6 +2647,8 @@ process_options() else if (strequ(p,"CALLPATH")) { opt_callpath = 1; OPTPRINT(fp,"%s%s",comma,"CALLPATH"); comma = ","; + } else { + printf("DrHook: Note - no match for HOOK_OPT : %s\n",p); } p = strtok(NULL,delim); } @@ -2620,20 +2666,20 @@ process_options() if (callpath_indent < 1 || callpath_indent > 8) callpath_indent = callpath_indent_default; } OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLPATH_INDENT=%d\n",pfx,TIMESTR(tid),FFL,callpath_indent); - + env = getenv("DR_HOOK_CALLPATH_DEPTH"); if (env) { callpath_depth = atoi(env); if (callpath_depth < 0) callpath_depth = callpath_depth_default; } OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLPATH_DEPTH=%d\n",pfx,TIMESTR(tid),FFL,callpath_depth); - + env = getenv("DR_HOOK_CALLPATH_PACKED"); if (env) { callpath_packed = atoi(env); } OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLPATH_PACKED=%d\n",pfx,TIMESTR(tid),FFL,callpath_packed); - + env = getenv("DR_HOOK_CALLTRACE"); if (env) { opt_calltrace = atoi(env); @@ -2750,7 +2796,7 @@ getkey(int tid, const char *name, int name_len, keyptr->name[name_len] = 0; } if (filename_len > 0 && - filename && + filename && *filename) { char *psave = NULL; char *p = psave = malloc_drhook((filename_len+1)*sizeof(*filename)); @@ -2771,16 +2817,19 @@ getkey(int tid, const char *name, int name_len, } found = 1; } - if (found || + if (found || (keyptr->name_len == name_len && - (!callpath || (callpath && keyptr->callpath && + (!callpath || (callpath && keyptr->callpath && keyptr->callpath_len == callpath_len && keyptr->callpath_fullhash == fullhash)) && ((!opt_trim && *keyptr->name == *name && strnequ(keyptr->name, name, name_len)) || (opt_trim && strncasecmp(keyptr->name, name, name_len) == 0)))) { if (opt_walltime) keyptr->wall_in = walltime ? *walltime : WALLTIME(); if (opt_cputime) keyptr->cpu_in = cputime ? *cputime : CPUTIME(); - if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); + if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); +#ifdef HKPAPI + drhook_papi_readAll(keyptr->counters_in); +#endif if (any_memstat) memstat(keyptr,&tid,1); if (opt_calls) { keyptr->calls++; @@ -2882,6 +2931,11 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, else if (tid >= 1 && tid <= numthreads) { double delta_wall = 0; double delta_cpu = 0; + long_long * delta_counters=NULL; +#ifdef HKPAPI + delta_counters=alloca(drhook_papi_num_counters() * sizeof(long_long) ); + drhook_papi_bzero(delta_counters); +#endif long long int delta_cycles = 0; if (any_memstat) memstat(keyptr,&tid,0); if (opt_calls) keyptr->status--; @@ -2911,16 +2965,20 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, if (opt_walltime) keyptr->delta_wall_all += delta_wall; if (opt_cputime) keyptr->delta_cpu_all += delta_cpu; if (opt_cycles) keyptr->delta_cycles_all += delta_cycles; - remove_calltree(tid, keyptr, &delta_wall, &delta_cpu, &delta_cycles); +#ifdef HKPAPI + drhook_papi_subtract(delta_counters, NULL , keyptr->counters_in); + drhook_papi_add(NULL, keyptr->delta_counters_all, delta_counters); +#endif + remove_calltree(tid, keyptr, &delta_wall, &delta_cpu, &delta_cycles,delta_counters); } } - + /*--- init_drhook ---*/ static void init_drhook(int ntids) { - if (numthreads == 0 || !keydata || !calltree || !keyself || !overhead || !curkeyptr || !cstk) { + if (numthreads == 0 || !keydata || !calltree || !keyself || !overhead || !curkeyptr || !cstk) { int j; if (pid == -1) { /* Ensure that called just once */ { @@ -3027,9 +3085,9 @@ if (overhead && tid >= 1 && tid <= numthreads) { \ } static drhook_key_t * -itself(drhook_key_t *keyptr_self, - int tid, int opt, double *delta_time, - const double *walltime, const double *cputime) +itself(drhook_key_t *keyptr_self, + int tid, int opt, double *delta_time, + const double *walltime, const double *cputime) { drhook_key_t *keyptr = NULL; if (keyself) { @@ -3037,6 +3095,9 @@ itself(drhook_key_t *keyptr_self, if (opt == 0) { if (opt_wallprof) keyptr->wall_in = walltime ? *walltime : WALLTIME(); else keyptr->cpu_in = cputime ? *cputime : CPUTIME(); +#ifdef HKPAPI + drhook_papi_readAll(keyptr->counters_in); +#endif keyptr->calls++; } else if (opt == 1) { @@ -3050,6 +3111,17 @@ itself(drhook_key_t *keyptr_self, keyptr->delta_cpu_all += delta; } if (delta_time) *delta_time = delta; + +#ifdef HKPAPI + long_long cntrs_delta[NPAPICNTRS]; + + /* cntrs_delta = current - counters_in */ + drhook_papi_subtract(cntrs_delta, NULL, keyptr->counters_in); + + /* keyptr->delta_counters_all += cntrs_delta */ + drhook_papi_add(NULL, keyptr->delta_counters_all,cntrs_delta); +#endif + } } return keyptr; @@ -3058,7 +3130,7 @@ itself(drhook_key_t *keyptr_self, /*--- commie -routines : adds "," i.e. comma after each 3 digit, e.g.: 1234567890 becomes more readable 1,234,567,890 */ -static void +static void lld_commie(long long int n, char sd[]) { const char comma = ','; @@ -3084,7 +3156,7 @@ lld_commie(long long int n, char sd[]) } } -static void +static void dbl_commie(double n, char sd[]) { const char comma = ','; @@ -3113,7 +3185,7 @@ dbl_commie(double n, char sd[]) /*--- callpath as a "pathname" ---*/ static void -unroll_callpath(FILE *fp, int len, +unroll_callpath(FILE *fp, int len, const equivalence_t *callpath, int callpath_len) { if (fp && callpath && callpath_len > 0) { @@ -3174,7 +3246,7 @@ static void do_prof() { /* to avoid recursive signals while atexit() (e.g. SIGXCPU) */ - if (signal_handler_ignore_atexit) return; + if (signal_handler_ignore_atexit) return; if (!do_prof_off && (opt_wallprof || opt_cpuprof)) { /* CPU or wall-clock profiling */ @@ -3185,6 +3257,15 @@ do_prof() c_drhook_print_(&ftnunitno, &master, &print_option, &initlev); } + if (!do_prof_off && (opt_papi)) { + /* CPU or wall-clock profiling */ + const int ftnunitno = 0; + const int master = 1; + const int print_option = 3; + int initlev = 0; + c_drhook_print_(&ftnunitno, &master, &print_option, &initlev); + } + if (!do_prof_off && opt_memprof) { /* Memory profiling */ const int ftnunitno = 0; @@ -3230,7 +3311,7 @@ typedef enum { /* See dr_hook_watch_mod.F90 */ KEY_I4 = 4, KEY_I8 = 8, KEY_R4 = 16, - KEY_R8 = 32 + KEY_R8 = 32 } PrintWatchKeys_t; static void print_watch(int ftnunitno, int key, const void *ptr, int n) @@ -3265,7 +3346,7 @@ static void print_watch(int ftnunitno, int key, const void *ptr, int n) } } -static void +static void check_watch(const char *label, const char *name, int name_len, @@ -3331,14 +3412,21 @@ c_drhook_check_watch_(const char *where, } /*** PUBLIC ***/ +#ifdef HKPAPI +#define PAPIREAD \ + long_long cntrs[NPAPICNTRS]; \ + drhook_papi_readAll(cntrs) +#else +#define PAPIREAD /*NOOP*/ +#endif #define TIMERS \ double walltime = opt_walltime ? WALLTIME() : 0; \ double cputime = opt_cputime ? CPUTIME() : 0; \ long long int cycles = opt_cycles ? ec_get_cycles() : 0; \ long long int hwm = opt_gethwm ? gethwm_() : 0; \ - long long int stk = opt_getstk ? getstk_() : 0 - + long long int stk = opt_getstk ? getstk_() : 0; \ + PAPIREAD /*=== c_drhook_set_lhook_ ===*/ @@ -3350,12 +3438,12 @@ c_drhook_set_lhook_(const int *lhook) /*=== c_drhook_getenv_ ===*/ -void -c_drhook_getenv_(const char *s, +void +c_drhook_getenv_(const char *s, char *value, /* Hidden arguments */ int slen, - const int valuelen) + const int valuelen) { char *env = NULL; char *p = malloc_drhook(slen+1); @@ -3363,14 +3451,14 @@ c_drhook_getenv_(const char *s, fprintf(stderr,"c_drhook_getenv_(): Unable to allocate %d bytes of memory\n", slen+1); DRHOOK_ABORT(); } - memcpy(p,s,slen); + memcpy(p,s,slen); p[slen]='\0'; memset(value, ' ', valuelen); env = getenv(p); if (env) { int len = strlen(env); if (valuelen < len) len = valuelen; - memcpy(value,env,len); + memcpy(value,env,len); } free_drhook(p); } @@ -3386,7 +3474,7 @@ static void drhook_delete_lockfile() { } } -void +void c_drhook_init_(const char *progname, const int *num_threads /* Hidden length */ @@ -3395,7 +3483,7 @@ c_drhook_init_(const char *progname, init_drhook(*num_threads); //max_threads = MAX(1,*num_threads); if (a_out) free_drhook(a_out); - progname = trim(progname, &progname_len); + progname = trim(progname, &progname_len); if (progname_len > 0) { a_out = calloc_drhook(progname_len+1,sizeof(*progname)); memcpy(a_out, progname, progname_len); @@ -3421,6 +3509,10 @@ c_drhook_init_(const char *progname, tabort_delete_lockfile(); drhook_delete_lockfile(); } +#ifdef HKPAPI + drhook_papi_init(myproc -1); +#endif + } @@ -3440,7 +3532,7 @@ c_drhook_watch_(const int *onoff, { int tid = drhook_oml_get_thread_num(); drhook_watch_t *p = NULL; - if (!drhook_lhook) return; + if (!drhook_lhook) return; drhook_oml_set_lock(); @@ -3499,9 +3591,9 @@ c_drhook_watch_(const int *onoff, /*=== c_drhook_start_ ===*/ -void -c_drhook_start_(const char *name, - const int *thread_id, +void +c_drhook_start_(const char *name, + const int *thread_id, double *key, const char *filename, const int *sizeinfo @@ -3523,7 +3615,7 @@ c_drhook_start_(const char *name, dump_hugepages(0,pfx,tid,0,-1); } if (!opt_callpath) { - u.keyptr = getkey(*thread_id, name, name_len, + u.keyptr = getkey(*thread_id, name, name_len, filename, filename_len, &walltime, &cputime, &cycles, NULL, 0, NULL); @@ -3532,7 +3624,7 @@ c_drhook_start_(const char *name, int free_callpath = 1; int callpath_len = 0; equivalence_t *callpath = get_callpath(*thread_id, &callpath_len); - u.keyptr = getkey(*thread_id, name, name_len, + u.keyptr = getkey(*thread_id, name, name_len, filename, filename_len, &walltime, &cputime, &cycles, callpath, callpath_len, &free_callpath); @@ -3547,7 +3639,7 @@ c_drhook_start_(const char *name, (void) callstack(*thread_id, key, u.keyptr); } ITSELF_1; - if (opt_calltrace) { + if (opt_calltrace) { drhook_oml_set_lock(); { const int ftnunitno = 0; /* stderr */ @@ -3598,7 +3690,7 @@ c_drhook_start_(const char *name, /*=== c_drhook_end_ ===*/ -void +void c_drhook_end_(const char *name, const int *thread_id, const double *key, @@ -3665,7 +3757,7 @@ c_drhook_end_(const char *name, } /* if (opt_timeline_thread <= 0 || tid <= opt_timeline_thread) */ } if (watch && watch_count > 0) check_watch("when leaving routine", name, name_len, 1); - putkey(*thread_id, u.keyptr, name, name_len, + putkey(*thread_id, u.keyptr, name, name_len, *sizeinfo, &walltime, &cputime, &cycles); ITSELF_1; @@ -3703,7 +3795,7 @@ c_drhook_memcounter_(const int *thread_id, keyptr->mem_curdelta += *size; alldelta = keyptr->mem_curdelta + keyptr->mem_child; if (alldelta > keyptr->maxmem_alldelta) keyptr->maxmem_alldelta = alldelta; - if (keyptr->mem_curdelta > keyptr->maxmem_selfdelta) + if (keyptr->mem_curdelta > keyptr->maxmem_selfdelta) keyptr->maxmem_selfdelta = keyptr->mem_curdelta; if (keyptr_addr) { u.keyptr = keyptr; @@ -3726,7 +3818,7 @@ c_drhook_memcounter_(const int *thread_id, u.keyptr_addr = *keyptr_addr; keyptr = u.keyptr; } - else + else keyptr = curkeyptr[tid-1]; /* fprintf(stderr, @@ -3862,40 +3954,40 @@ trim_and_adjust_left(const char *p, int *name_len) return p; } -static void print_routine_name0(FILE * fp, const char * p_name, int p_tid, const char * p_filename, int p_cluster, - const equivalence_t * p_callpath, int p_callpath_len, int len, int cluster_size) +static void print_routine_name0(FILE * fp, const char * p_name, int p_tid, const char * p_filename, int p_cluster, + const equivalence_t * p_callpath, int p_callpath_len, int len, int cluster_size) { - int name_len = 0; - const char *name = trim_and_adjust_left(p_name,&name_len); + int name_len = 0; + const char *name = trim_and_adjust_left(p_name,&name_len); if (callpath_packed) { if (p_callpath && p_callpath_len > 0) { const equivalence_t * callpath = &p_callpath[p_callpath_len-1]; int j; - for (j=0; jkeyptr && callpath->keyptr->name) { const char *name = callpath->keyptr->name; int name_len = callpath->keyptr->name_len; fprintf(fp,"%.*s/",name_len,name); } - } - } - - fprintf(fp,"%.*s@%d%s%s", - name_len, name, - p_tid, - p_filename ? ":" : "", - p_filename ? p_filename : ""); - - if (opt_clusterinfo) { - fprintf(fp," [%d,%d]", - p_cluster, ABS(cluster_size)); - } - - if (!callpath_packed) - unroll_callpath(fp, len, p_callpath, p_callpath_len); - + } + } + + fprintf(fp,"%.*s@%d%s%s", + name_len, name, + p_tid, + p_filename ? ":" : "", + p_filename ? p_filename : ""); + + if (opt_clusterinfo) { + fprintf(fp," [%d,%d]", + p_cluster, ABS(cluster_size)); + } + + if (!callpath_packed) + unroll_callpath(fp, len, p_callpath, p_callpath_len); + } @@ -3911,9 +4003,9 @@ DrHookPrint(int ftnunitno, const char *line) { if (line) { FILE *fp = NULL; - if (ftnunitno <= 0) + if (ftnunitno <= 0) fp = stderr; - else if (ftnunitno == 6) + else if (ftnunitno == 6) fp = stdout; else dr_hook_prt_(&ftnunitno, line, strlen(line)); @@ -3921,11 +4013,11 @@ DrHookPrint(int ftnunitno, const char *line) } } -void +void c_drhook_print_(const int *ftnunitno, const int *thread_id, - const int *print_option, /* - 1=raw call counts + const int *print_option, /* + 1=raw call counts 2=calling tree 3=profiling info 4=memory profiling @@ -3989,14 +4081,14 @@ c_drhook_print_(const int *ftnunitno, } /* for (j=0; jactive)) { - int do_print = (*print_option == 2 || + int do_print = (*print_option == 2 || abs_print_option == 7 || abs_print_option == 5 || abs_print_option == 6); if (do_print) { @@ -4050,7 +4142,7 @@ c_drhook_print_(const int *ftnunitno, default: case 2: kind = ':'; is_timeline = 0; break; } - if (*print_option == 2 || + if (*print_option == 2 || (is_timeline && tid > 1 && tid <= opt_timeline_thread)) { sprintf(s,"%s %s [DrHookCallTree] %s%c ", pfx,TIMESTR(tid), @@ -4216,6 +4308,13 @@ c_drhook_print_(const int *ftnunitno, drhook_key_t *keyptr = &keydata[t][j]; while (keyptr) { if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) { +#ifdef HKPAPI + drhook_papi_subtract(p->counter_self, + keyptr->delta_counters_all, + keyptr->delta_counters_child); + drhook_papi_cpy(p->counter_tot, + keyptr->delta_counters_all); +#endif p->self = opt_wallprof ? keyptr->delta_wall_all - keyptr->delta_wall_child : keyptr->delta_cpu_all - keyptr->delta_cpu_child; @@ -4253,8 +4352,11 @@ c_drhook_print_(const int *ftnunitno, int *clusize = calloc_drhook(nprof+1, sizeof(*clusize)); /* make sure at least 1 element */ char *prevname = NULL; const char *fmt = "%5d %8.2f %12.3f %12.3f %12.3f %14llu %11.2f %11.2f %s"; + const char *csvfmt = "%s,%d,%d,%d,%.4f,%.6f,%.6f,%.6f,%llu"; char *filename = get_mon_out(myproc); + char *csvfilename = get_csv_out(myproc); FILE *fp = NULL; + FILE *fpcsv = NULL; if (!filename) break; @@ -4264,14 +4366,24 @@ c_drhook_print_(const int *ftnunitno, pfx,TIMESTR(tid),FFL, myproc,filename); } - - fp = fopen(filename,"w"); + fp = fopen(filename,"w"); if (!fp) goto finish_3; - + + if (opt_papi==1){ + if ((myproc == 1 && mon_out_procs == -1) || mon_out_procs == myproc) { + fprintf(stderr, + "%s %s [%s@%s:%d] Writing counter information of proc#%d into file '%s'\n", + pfx,TIMESTR(tid),FFL, + myproc,csvfilename); + } + fpcsv = fopen(csvfilename,"w"); + if (!fpcsv) goto finish_3; + } + /* alphanumerical sorting to find out clusters of the same routine but on different threads */ /* also find out total wall clock time */ /* calculate percentage values */ - + p = prof; qsort(p, nprof, sizeof(*p), prof_name_comp); @@ -4422,52 +4534,100 @@ c_drhook_print_(const int *ftnunitno, } } - fprintf(fp,"\n"); - { - len = - fprintf(fp," # %% Time Cumul Self Total # of calls Self Total "); - } - fprintf(fp,"Routine@"); - if (opt_clusterinfo) fprintf(fp," [Cluster:(id,size)]"); - fprintf(fp,"\n"); - if (opt_sizeinfo) fprintf(fp,"%*s %s\n",len-20," ","(Size; Size/sec; Size/call; MinSize; MaxSize)"); - fprintf(fp, " (self) (sec) (sec) (sec) ms/call ms/call\n"); - fprintf(fp,"\n"); - - cumul = 0; - for (j=0; jcluster]; - if (p->pc < percent_limit) break; - if (opt_cputime) { - cumul += p->self; - } - else { - if (p->is_max || cluster_size == 1) cumul += p->self; - } - { - fprintf(fp, fmt, - ++j, p->pc, cumul, p->self, p->total, p->calls, - p->percall_ms_self, p->percall_ms_total, - p->is_max ? "*" : " "); - } - - print_routine_name(fp, p, len, cluster_size); - - if (opt_sizeinfo && p->sizeinfo > 0) { - char s1[DRHOOK_STRBUF], s2[DRHOOK_STRBUF], s3[DRHOOK_STRBUF]; - char s4[DRHOOK_STRBUF], s5[DRHOOK_STRBUF]; - lld_commie(p->sizeinfo,s1); - dbl_commie(p->sizespeed,s2); - dbl_commie(p->sizeavg,s3); - lld_commie(p->min_sizeinfo,s4); - lld_commie(p->max_sizeinfo,s5); - fprintf(fp,"\n%*s (%s; %s; %s; %s; %s)",len-20," ",s1,s2,s3,s4,s5); - } - fprintf(fp,"\n"); - p++; - } /* for (j=0; j"); + if (opt_clusterinfo) fprintf(fp," [Cluster:(id,size)]"); + fprintf(fp,"\n"); + if (opt_sizeinfo) fprintf(fp,"%*s %s\n",len-20," ","(Size; Size/sec; Size/call; MinSize; MaxSize)"); + fprintf(fp, " (self) (sec) (sec) (sec) ms/call ms/call\n"); + fprintf(fp,"\n"); + + cumul = 0; + for (j=0; jcluster]; + if (p->pc < percent_limit) break; + if (opt_cputime) { + cumul += p->self; + } + else { + if (p->is_max || cluster_size == 1) cumul += p->self; + } + + { + fprintf(fp, fmt, + ++j, p->pc, cumul, p->self, p->total, p->calls, + p->percall_ms_self, p->percall_ms_total, + p->is_max ? "*" : " "); + } + print_routine_name(fp, p, len, cluster_size); + + if (opt_sizeinfo && p->sizeinfo > 0) { + char s1[DRHOOK_STRBUF], s2[DRHOOK_STRBUF], s3[DRHOOK_STRBUF]; + char s4[DRHOOK_STRBUF], s5[DRHOOK_STRBUF]; + lld_commie(p->sizeinfo,s1); + dbl_commie(p->sizespeed,s2); + dbl_commie(p->sizeavg,s3); + lld_commie(p->min_sizeinfo,s4); + lld_commie(p->max_sizeinfo,s5); + fprintf(fp,"\n%*s (%s; %s; %s; %s; %s)",len-20," ",s1,s2,s3,s4,s5); + } + fprintf(fp,"\n"); + p++; + } /* for (j=0; jcluster]; + if (opt_cputime) + cumul += p->self; + else + if (p->is_max || cluster_size == 1) cumul += p->self; + + { + fprintf(fpcsv, csvfmt, + p->name, + myproc-1, + p->tid-1 + ,++j, p->pc, cumul, p->self, p->total, p->calls, + p->is_max ? "*" : " "); + for (int c=0;ccounter_self[c]); + for (int c=0;ccounter_tot[c]); + if (first_counter_is_cyc==1) + fprintf(fpcsv,",%.3f,%.3f", + p->counter_self[0]/p->self/1000000.0, + p->counter_tot[0]/p->total/1000000.0 + ); + } + fprintf(fpcsv,"\n"); + p++; + } /* for (j=0; j"); @@ -4693,10 +4853,10 @@ c_drhook_print_(const int *ftnunitno, t = p->tid - 1; if (p->children > maxseen_tot[t]) p->children = maxseen_tot[t]; /* adjust */ fprintf(fp, fmt, - ++j, p->pc, + ++j, p->pc, p->self, p->children, p->leaked, p->hwm, p->stk, p->pag, - p->calls, p->alloc_count, + p->calls, p->alloc_count, (p->alloc_count - p->free_count != 0) ? "*" : " ", p->free_count, p->is_max ? "*" : " "); @@ -4705,7 +4865,7 @@ c_drhook_print_(const int *ftnunitno, fprintf(fp,"\n"); p++; } /* for (j=0; j 0 ? name_len : (int)strlen(name), filename_len > 0 ? filename_len : (int)strlen(filename)); } else if (option == 1) { - c_drhook_end_(name, &tid, handle, + c_drhook_end_(name, &tid, handle, filename, &sizeinfo, name_len > 0 ? name_len : (int)strlen(name), filename_len > 0 ? filename_len : (int)strlen(filename)); } } -/* +/* this is result of moving some code from libodb.a (odb/aux/util_ccode.c) for use by libifsaux.a directly ; simplifies linking sequences. @@ -4832,7 +4992,7 @@ double util_cputime_() } return (tbuf.tms_utime + tbuf.tms_stime + - tbuf.tms_cutime + tbuf.tms_cstime) / clock_ticks; + tbuf.tms_cutime + tbuf.tms_cstime) / clock_ticks; } int util_ihpstat_(int *option) @@ -4854,13 +5014,13 @@ static void set_timed_kill() int nelems = sscanf(p,"%d:%d:%d:%lf", &target_myproc, &target_omltid, &target_sig, &start_time); int ntids = drhook_oml_get_max_threads(); - if (nelems == 4 && + if (nelems == 4 && (target_myproc == myproc || target_myproc == -1) && (target_omltid == -1 || (target_omltid >= 1 && target_omltid <= ntids)) && (target_sig >= 1 && target_sig <= NSIG) && start_time > 0) { if (ntids > 1) { - extern void drhook_run_omp_parallel_ipfipipipdpstr_(const int *, + extern void drhook_run_omp_parallel_ipfipipipdpstr_(const int *, void (*func)(const int *, const int *, const int *, const double *, const char *, long), const int *, const int *, const int *, const double *, const char *, long); drhook_run_omp_parallel_ipfipipipdpstr_(&ntids,set_killer_timer, diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c new file mode 100644 index 00000000..0fc5bc9c --- /dev/null +++ b/src/fiat/drhook/drhook_papi.c @@ -0,0 +1,305 @@ +#ifdef HKPAPI +#include "drhook_papi.h" +#include +#include +#include +#include +#include "oml.h" + +#define STD_MSG_LEN 4096 + +int * drhook_papi_event_set=NULL; +enum {drhook_papi_notstarted,drhook_papi_running,drhook_papi_failed}; +int drhook_papi_state=0; +int drhook_papi_rank=0; /* C style! */ +size_t drhook_max_counter_name=0; + +/* hardwired for now */ +const char * hookCounters[ NPAPICNTRS ][2]= + { + {"PAPI_TOT_CYC","Cycles"}, + {"PAPI_FP_OPS","FP Operations"}, + {"PAPI_L1_DCA","L1 Access"}, + {"PAPI_L2_DCM","L2 Miss"} + }; + +/* function to use for thread id + - it should be better than omp_get_thread_num! +*/ +unsigned long safe_thread_num(){ + return oml_my_thread()-1; +} + +const char * drhook_papi_counter_name(int c,int t){ + return hookCounters[c][t]; +} + +void drhook_papi_cpy(long_long* a,long_long* b){ + for (int i=0;i0){ + char fmt[STD_MSG_LEN]; + sprintf(fmt,"%%%lds",strlen(s)); + sprintf(msg,fmt," "); + for (int i=0;i 0) { + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: library version mismatch between compilation and run!\n"); + printf("%s\n",pmsg); + return 0; + } + if (paperr == PAPI_EINVAL){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_EINVAL\n"); + printf("%s\n",pmsg); + return 0; + } + if (paperr == PAPI_ENOMEM){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_ENOMEM\n"); + printf("%s\n",pmsg); + return 0; + } + if (paperr == PAPI_ESBSTR){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_ESBSTR\n"); + printf("%s\n",pmsg); + return 0; + } + if (paperr == PAPI_ESYS){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_ESYS\n"); + printf("%s\n",pmsg); + return 0; + } + else { + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Unknown error code\n"); + printf("%s\n",pmsg); + return 0; + } + } + + lib_version = PAPI_get_opt( PAPI_LIB_VERSION, NULL ); + + int nthreads=oml_get_max_threads(); + + paperr=PAPI_thread_init(safe_thread_num); + + if( paperr != PAPI_OK ){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: thread init failed (%s)",PAPI_strerror(paperr)); + printf("%s\n",pmsg); + return 0; + } + + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Version %d.%d.%d initialised with %d threads", + PAPI_VERSION_MAJOR( lib_version ), + PAPI_VERSION_MINOR( lib_version ), + PAPI_VERSION_REVISION( lib_version ), + nthreads ); + + if (drhook_papi_rank==0) printf("%s\n",pmsg); + + drhook_papi_event_set=malloc(nthreads*sizeof(int)); + + int prof_papi_numcntrs; + bool failed=false; + + drhook_run_omp_parallel_papi_startup(drhook_papi_event_set,nthreads); + + /* if (failed){ drhook_papi_state=drhook_papi_failed ; return 0;} */ + drhook_papi_state=drhook_papi_running; + if (drhook_papi_rank==0) printf("DRHOOK:PAPI: Initialisation sucess\n"); + return 1; +} + +int dr_hook_papi_start_threads(int * events){ + int thread=safe_thread_num(); + int papiErr; + char pmsg[STD_MSG_LEN]; + + events[thread]=PAPI_NULL; + papiErr=PAPI_create_eventset(&events[thread]); + if (papiErr != PAPI_OK){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: create event set failed (%s) \n",PAPI_strerror(papiErr)); + printf("%s\n",pmsg); + return 0; + } else + printf("Event set %d created for thread %d\n",events[thread],thread); + + int prof_papi_numcntrs=NPAPICNTRS; + for (int counter=0;counter < prof_papi_numcntrs ;counter ++){ + int eventCode; + + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: %s (%s)",hookCounters[counter][0],hookCounters[counter][1]); + if (drhook_papi_rank==0) if (thread==0)printf("%s\n",pmsg); + + papiErr=PAPI_event_name_to_code(hookCounters[counter][0],&eventCode); + if (papiErr !=PAPI_OK){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: event name to code failed (%s)",PAPI_strerror(papiErr)); + printf("%s\n",pmsg); + PAPI_perror("initPapi"); + return 0; + } + + papiErr=PAPI_add_event(events[thread],eventCode); + if (papiErr!=PAPI_OK){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: add_event failed: %d (%s)",papiErr,PAPI_strerror(papiErr)); + printf("%s\n",pmsg); + if (papiErr == PAPI_EINVAL) + printf("Invalid argumet"); + else if (papiErr == PAPI_ENOMEM) + printf("Out of Mmemory"); + else if (papiErr == PAPI_ENOEVST) + printf("EventSet does not exist"); + else if (papiErr == PAPI_EISRUN) + printf("EventSet is running"); + else if (papiErr == PAPI_ECNFLCT) + printf("Conflict"); + else if (papiErr == PAPI_ENOEVNT) + printf("Preset not available"); + return 0; + }else { +#if defined(DEBUG) + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Added code=%d to Evnt set %d",events[thread]); + if (thread==0)printf("%s\n",pmsg); +#endif + } + } + + int number; + int * checkEvents=malloc(prof_papi_numcntrs*sizeof(int)); + papiErr = PAPI_list_events(events[thread],checkEvents , &number); + if ( papiErr != PAPI_OK){ + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error querying events - %d=%s",papiErr,PAPI_strerror(papiErr)); + printf("%s\n",pmsg); + return 0; + }else { +#if defined(DEBUG) + for (counter=0;counter +#endif + +#include + +#define NPAPICNTRS 4 + +int drhook_papi_init(int rank); +int drhook_papi_num_counters(); +const char * drhook_papi_counter_name(int c,int t); +long_long drhook_papi_read(int counterId); +int drhook_papi_readAll(long_long * counterArray); + +/* implemented in forrtran */ +int drhook_run_omp_parallel_papi_startup(int * drhook_papi_event_set,int nthreads); + +/* a = b - c +if b or c == NULL means use current readings + */ +void drhook_papi_subtract(long_long* a, long_long* b, long_long* c); + +/* a = b + c +if a==NULL, b=b+c */ +void drhook_papi_add(long_long* a,long_long* b,long_long* c); + +/* a = b */ +void drhook_papi_cpy(long_long* a,long_long* b); + +/* a=0 */ +void drhook_papi_bzero(long_long* a); + +void drhook_papi_print(char * s,long_long* a,int header); + +#else +#define long_long long long +#endif +#endif diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index 36874633..c0a1fccf 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -10,6 +10,19 @@ ! These functions are to be used within drhook C methods, to avoid having OMP pragmas there. +module hook_papi_interface +#ifdef HKPAPI + + interface + function dr_hook_papi_start_threads ( events) bind ( c ) + use :: iso_c_binding + INTEGER(KIND=C_INT) :: dr_hook_papi_start_threads + INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(*) + end function dr_hook_papi_start_threads + end interface +#endif +end module hook_papi_interface + subroutine drhook_run_omp_parallel_ipfstr(NTIDS, FUNC, CDSTR) ! Usage: ! ------ @@ -61,3 +74,33 @@ subroutine drhook_run_omp_parallel_get_cycles(NTIDS, NCYCLES) NCYCLES(IOMPTID) = ICYCLES - NCYCLES(IOMPTID) !$OMP END PARALLEL end subroutine drhook_run_omp_parallel_get_cycles + +#ifdef HKPAPI + +subroutine drhook_run_omp_parallel_papi_startup(events,n) bind(c) + use, intrinsic :: iso_c_binding, only : c_char, c_int, c_double + use hook_papi_interface + use OML_MOD + implicit none + INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(n) + INTEGER(KIND=C_INT), VALUE :: n + INTEGER(KIND=C_INT) :: thread + INTEGER(KIND=C_INT) :: rc,rcOut + INTEGER :: myThread + INTEGER :: nThreads + + myThread=OML_MY_THREAD()-1 + nThreads=OML_GET_MAX_THREADS() + rcOut=0 + !$OMP PARALLEL + DO thread=0,nThreads-1 + if (thread==myThread) then + rc=dr_hook_papi_start_threads(events) + if (rc==0)rcOut=1 + end if + !$OMP BARRIER + END DO + !$OMP END PARALLEL + +end subroutine drhook_run_omp_parallel_papi_startup +#endif diff --git a/src/fiat/system/internal/opfla_perfmon.c b/src/fiat/system/internal/opfla_perfmon.c index 56623156..2dce3004 100644 --- a/src/fiat/system/internal/opfla_perfmon.c +++ b/src/fiat/system/internal/opfla_perfmon.c @@ -151,7 +151,7 @@ int report_init(int periodicreport){ rc = (num = PAPI_num_counters()); if (rc != PAPI_OK) { - PAPI_perror(rc, "PAPI_num_counters", strlen("PAPI_num_counters")); + PAPI_perror("PAPI_num_counters"); } //fprintf(stderr,"PAPI_num_counters = %d\n",num); diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index abee6f50..1fb2787c 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -55,6 +55,7 @@ find_package( OpenMP COMPONENTS C ) if( TARGET OpenMP::OpenMP_C ) target_link_libraries( fiat-printbinding OpenMP::OpenMP_C ) endif() + find_package( MPI COMPONENTS C ) if( HAVE_MPI AND TARGET MPI::MPI_C ) target_link_libraries( fiat-printbinding MPI::MPI_C ) @@ -68,3 +69,30 @@ if( disable_warning_implicit_function_declaration ) target_compile_options( fiat-printbinding PRIVATE "-Wno-implicit-function-declaration" ) endif() +### Program fiat-drhook-sanity +# Note: it only depends on MPI and OpenMP +ecbuild_add_executable(TARGET fiat-drhook-sanity + SOURCES fiat-drhook-sanity.F90 fiat-drhook-sanity-stream.F90 fiat-drhook-sanity-gemm.F90 mysecond.c + LIBS fiat parkind_dp + ) +target_compile_definitions( fiat-drhook-sanity PRIVATE OMP ) + +ecbuild_info( CMAKE_Fortran_COMPILER_ID - 1. ${CMAKE_Fortran_COMPILER_ID} 2. ${EC_COMPILER_FAMILY} ) +if( ${CMAKE_Fortran_COMPILER_ID} MATCHES Intel ) + set_source_files_properties(fiat-drhook-sanity-stream.f90 PROPERTIES COMPILE_FLAGS " -qopt-prefetch-distance=64,12 -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-zmm-usage=high ") +endif() + +find_package( OpenMP COMPONENTS Fortran ) +if( TARGET OpenMP::OpenMP_Fortran ) + target_link_libraries( fiat-drhook-sanity OpenMP::OpenMP_Fortran ) +endif() +target_link_libraries( fiat-drhook-sanity ${MKL_LIBRARIES} ) + +find_package( MPI COMPONENTS C ) +if( HAVE_MPI AND TARGET MPI::MPI_C ) + target_link_libraries( fiat-drhook-sanity MPI::MPI_C ) +else() + target_compile_definitions( fiat-drhook-sanity PRIVATE NOMPI ) +endif() + + diff --git a/src/programs/fiat-drhook-sanity-gemm.F90 b/src/programs/fiat-drhook-sanity-gemm.F90 new file mode 100644 index 00000000..b82cb87c --- /dev/null +++ b/src/programs/fiat-drhook-sanity-gemm.F90 @@ -0,0 +1,84 @@ +module gemm_mod + use yomhook, only : lhook,dr_hook,jphook + implicit none + +contains + subroutine gemm_combinations() + implicit none + integer*8 n,i + real(kind=jphook) :: zhook_handle + n=1000 +#if defined(HAVE_BLAS) + if (lhook) call dr_hook('GEMM_ALL',0,zhook_handle) + do i=1,4 + call dgemm_driver(n) + call sgemm_driver(n) + n=n*2 + end do + if (lhook) call dr_hook('GEMM_ALL',1,zhook_handle) +#endif + end subroutine gemm_combinations + +#if defined(HAVE_BLAS) + subroutine dgemm_driver(nn) + implicit none + double precision, allocatable :: a(:,:),b(:,:),c(:,:) + double precision :: alpha,beta + integer :: m,k,n + integer :: i,j + integer*8 :: nn + real(kind=jphook) :: zhook_handle + character(len=25) :: tag + + write(tag,'(i20)')nn + tag="_n="//adjustl(tag) + m=nn + n=nn + k=nn + alpha=1.0 + beta=0.0 + + allocate(a(m,k), b(k,n), c(m,n)) + a=1.0 + b=2.0 + c=3.0 + if (lhook) call dr_hook('DGEMM'//TRIM(tag),0,zhook_handle) + call dgemm('n','n',m,n,k,alpha,a,m,b,k,beta,c,m) + if (lhook) call dr_hook('DGEMM'//TRIM(tag),1,zhook_handle) + + return + + end subroutine dgemm_driver + + subroutine sgemm_driver(nn) + implicit none + real*4, allocatable :: a(:,:),b(:,:),c(:,:) + real*4 :: alpha,beta + integer :: m,k,n + integer :: i,j + integer*8 :: nn + real(kind=jphook) :: zhook_handle + character(len=25) :: tag + + write(tag,'(i20)')nn + tag="_n="//adjustl(tag) + m=nn + n=nn + k=nn + alpha=1.0 + beta=0.0 + + allocate(a(m,k), b(k,n), c(m,n)) + a=1.0 + b=2.0 + c=3.0 + if (lhook) call dr_hook('SGEMM'//TRIM(tag),0,zhook_handle) + call sgemm('n','n',m,n,k,alpha,a,m,b,k,beta,c,m) + if (lhook) call dr_hook('SGEMM'//TRIM(tag),1,zhook_handle) + + return + + end subroutine sgemm_driver +#endif + +end module gemm_mod diff --git a/src/programs/fiat-drhook-sanity-stream.F90 b/src/programs/fiat-drhook-sanity-stream.F90 new file mode 100644 index 00000000..2dbd1ac5 --- /dev/null +++ b/src/programs/fiat-drhook-sanity-stream.F90 @@ -0,0 +1,460 @@ +MODULE stream_mod + !======================================================================= + ! Program: STREAM + ! Programmer: John D. McCalpin + ! RCS Revision: $Id: stream.f,v 5.6 2005/10/04 00:20:48 mccalpin Exp mccalpin $ + !----------------------------------------------------------------------- + ! Copyright 1991-2003: John D. McCalpin + !----------------------------------------------------------------------- + ! License: + ! 1. You are free to use this program and/or to redistribute + ! this program. + ! 2. You are free to modify this program for your own use, + ! including commercial use, subject to the publication + ! restrictions in item 3. + ! 3. You are free to publish results obtained from running this + ! program, or from works that you derive from this program, + ! with the following limitations: + ! 3a. In order to be referred to as "STREAM benchmark results", + ! published results must be in conformance to the STREAM + ! Run Rules, (briefly reviewed below) published at + ! http://www.cs.virginia.edu/stream/ref.html + ! and incorporated herein by reference. + ! As the copyright holder, John McCalpin retains the + ! right to determine conformity with the Run Rules. + ! 3b. Results based on modified source code or on runs not in + ! accordance with the STREAM Run Rules must be clearly + ! labelled whenever they are published. Examples of + ! proper labelling include: + ! "tuned STREAM benchmark results" + ! "based on a variant of the STREAM benchmark code" + ! Other comparable, clear and reasonable labelling is + ! acceptable. + ! 3c. Submission of results to the STREAM benchmark web site + ! is encouraged, but not required. + ! 4. Use of this program or creation of derived works based on this + ! program constitutes acceptance of these licensing restrictions. + ! 5. Absolutely no warranty is expressed or implied. + !----------------------------------------------------------------------- + ! This program measures sustained memory transfer rates in MB/s for + ! simple computational kernels coded in FORTRAN. + ! + ! The intent is to demonstrate the extent to which ordinary user + ! code can exploit the main memory bandwidth of the system under + ! test. + use yomhook, only : lhook,dr_hook,jphook + +contains + subroutine stream_combinations() + implicit none + integer*8 n,ntimes,i + real(kind=jphook) :: zhook_handle + n=1024*1024 + ntimes=1024 + if (lhook) call dr_hook('STREAM',0,zhook_handle) + do i=1,3 + call stream(n,ntimes) + n=n*8 + ntimes=ntimes/8 + end do + + if (lhook) call dr_hook('STREAM',1,zhook_handle) + + end subroutine stream_combinations + + SUBROUTINE stream(n,ntimes) +!$ USE omp_lib + INTEGER*8 n,offset,ndim + INTEGER*8 ntimes + PARAMETER (offset=0) + ! .. + ! .. Local Scalars .. + DOUBLE PRECISION scalar,t + INTEGER j,k,nbpw,quantum + ! .. + ! .. Local Arrays .. + DOUBLE PRECISION maxtime(4),mintime(4),avgtime(4), & + times(4,ntimes) + INTEGER bytes(4) + CHARACTER label(4)*11 + ! .. + ! .. External Functions .. + DOUBLE PRECISION mysecond + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + REAL(KIND=JPHOOK) :: ZHOOK_1,ZHOOK_2,ZHOOK_3,ZHOOK_4 + CHARACTER(len=29) :: tag + +! INTEGER realsize + EXTERNAL mysecond !,checktick !,realsize + ! .. + ! .. Intrinsic Functions .. + ! + INTRINSIC dble,max,min,nint,sqrt + ! .. + ! .. Arrays in Common .. + DOUBLE PRECISION, allocatable :: a(:),b(:),c(:) + !dir$ attributes align:64 :: A, B, C +! CHARACTER(len=40) :: suffix + ! .. + ! .. Common blocks .. + ! COMMON a,b,c + ! .. + ! .. Data statements .. + DATA avgtime/4*0.0D0/,mintime/4*1.0D+36/,maxtime/4*0.0D0/ + DATA label/'Copy: ','Scale: ','Add: ','Triad: '/ + DATA bytes/2,2,3,3/ + ! .. +! WRITE(suffix,'(A,I30)')"_",n + ! --- SETUP --- determine precision and check timing --- + ndim=n+offset + allocate(a(ndim),b(ndim),c(ndim)) + nbpw = realsize() + write(tag,'(I20)')n +!$ if (omp_in_parallel()) then +!$ tag="_par_n="//adjustl(tag) +!$ else + tag="_n="//adjustl(tag) +!$ end if + + PRINT *,'----------------------------------------------' + PRINT *,'STREAM Version $Revision: 5.6 $' + PRINT *,'----------------------------------------------' + WRITE (*,FMT=9010) 'Array size = ',n + WRITE (*,FMT=9010) 'Offset = ',offset + WRITE (*,FMT=9020) 'The total memory requirement is ', & + 3*nbpw*n/ (1024*1024),' MB' + WRITE (*,FMT=9030) 'You are running each test ',ntimes,' times' + WRITE (*,FMT=9030) '--' + WRITE (*,FMT=9030) 'The *best* time for each test is used' + WRITE (*,FMT=9030) '*EXCLUDING* the first and last iterations' +!$OMP PARALLEL +!$OMP MASTER + PRINT *,'----------------------------------------------' +!$ PRINT *,'Number of Threads = ',OMP_GET_NUM_THREADS() +!$OMP END MASTER +!$OMP END PARALLEL + + PRINT *,'----------------------------------------------' + +!$OMP PARALLEL DO + DO 10 j = 1,n + a(j) = 2.0d0 + b(j) = 0.5D0 + c(j) = 0.0D0 +10 END DO + t = mysecond() +!$OMP PARALLEL DO + DO 20 j = 1,n + a(j) = 0.5d0*a(j) +20 END DO + t = mysecond() - t + PRINT *,'----------------------------------------------------' + quantum = checktick() + WRITE (*,FMT=9000) & + 'Your clock granularity/precision appears to be ',quantum, & + ' microseconds' + PRINT *,'----------------------------------------------------' + + ! --- MAIN LOOP --- repeat test cases NTIMES times --- + scalar = 0.5d0*a(1) + DO 70 k = 1,ntimes + + IF (LHOOK) CALL DR_HOOK('STREAM_COPY'//TRIM(tag),0,ZHOOK_1) + t = mysecond() + a(1) = a(1) + t +!$OMP PARALLEL DO + DO 30 j = 1,n + c(j) = a(j) +30 END DO + t = mysecond() - t + IF (LHOOK) CALL DR_HOOK('STREAM_COPY'//TRIM(tag),1,ZHOOK_1) + + c(n) = c(n) + t + times(1,k) = t + + IF (LHOOK) CALL DR_HOOK('STREAM_SCALE'//TRIM(tag),0,ZHOOK_2) + t = mysecond() + c(1) = c(1) + t +!$OMP PARALLEL DO + DO 40 j = 1,n + b(j) = scalar*c(j) +40 END DO + t = mysecond() - t + IF (LHOOK) CALL DR_HOOK('STREAM_SCALE'//TRIM(tag),1,ZHOOK_2) + + b(n) = b(n) + t + times(2,k) = t + + IF (LHOOK) CALL DR_HOOK('STREAM_ADD'//TRIM(tag),0,ZHOOK_3) + t = mysecond() + a(1) = a(1) + t +!$OMP PARALLEL DO + DO 50 j = 1,n + c(j) = a(j) + b(j) +50 END DO + t = mysecond() - t + IF (LHOOK) CALL DR_HOOK('STREAM_ADD'//TRIM(tag),1,ZHOOK_3) + c(n) = c(n) + t + times(3,k) = t + + IF (LHOOK) CALL DR_HOOK('STREAM_TRIAD'//TRIM(tag),0,ZHOOK_4) + t = mysecond() + b(1) = b(1) + t +!$OMP PARALLEL DO + DO 60 j = 1,n + a(j) = b(j) + scalar*c(j) +60 END DO + t = mysecond() - t + IF (LHOOK) CALL DR_HOOK('STREAM_TRIAD'//TRIM(tag),1,ZHOOK_4) + + a(n) = a(n) + t + times(4,k) = t +70 END DO + + ! --- SUMMARY --- + DO 90 k = 2,ntimes + DO 80 j = 1,4 + avgtime(j) = avgtime(j) + times(j,k) + mintime(j) = min(mintime(j),times(j,k)) + maxtime(j) = max(maxtime(j),times(j,k)) +80 END DO +90 END DO + WRITE (*,FMT=9040) + DO 100 j = 1,4 + avgtime(j) = avgtime(j)/dble(ntimes-1) + WRITE (*,FMT=9050) label(j),n*bytes(j)*nbpw/mintime(j)/1.0D6, & + avgtime(j),mintime(j),maxtime(j) +100 END DO + PRINT *,'----------------------------------------------------' + CALL checksums (a,b,c,n,ntimes) + PRINT *,'----------------------------------------------------' + +9000 FORMAT (1x,a,i6,a) +9010 FORMAT (1x,a,i10) +9020 FORMAT (1x,a,i7,a) +9030 FORMAT (1x,a,i5,a,a) +9040 FORMAT ('Function',5x,'Rate (MB/s) Avg time Min time Max time' & + ) +9050 FORMAT (a,4 (f12.4,2x)) + END SUBROUTINE stream + + !------------------------------------- + ! INTEGER FUNCTION dblesize() + ! + ! A semi-portable way to determine the precision of DOUBLE PRECISION + ! in Fortran. + ! Here used to guess how many bytes of storage a DOUBLE PRECISION + ! number occupies. + ! + INTEGER FUNCTION realsize() + ! IMPLICIT NONE + + ! .. Local Scalars .. + DOUBLE PRECISION result,test + INTEGER j,ndigits + ! .. + ! .. Local Arrays .. + DOUBLE PRECISION ref(30) + ! .. + ! .. External Subroutines .. +! EXTERNAL confuse + ! .. + ! .. Intrinsic Functions .. + INTRINSIC abs,acos,log10,sqrt + ! .. + + ! Test #1 - compare single(1.0d0+delta) to 1.0d0 + +10 DO 20 j = 1,30 + ref(j) = 1.0d0 + 10.0d0** (-j) +20 END DO + + DO 30 j = 1,30 + test = ref(j) + ndigits = j + CALL confuse(test,result) + IF (test.EQ.1.0D0) THEN + GO TO 40 + END IF +30 END DO + GO TO 50 + +40 WRITE (*,FMT='(a)') & + '----------------------------------------------' + WRITE (*,FMT='(1x,a,i2,a)') 'Double precision appears to have ', & + ndigits,' digits of accuracy' + IF (ndigits.LE.8) THEN + realsize = 4 + ELSE + realsize = 8 + END IF + WRITE (*,FMT='(1x,a,i1,a)') 'Assuming ',realsize, & + ' bytes per DOUBLE PRECISION word' + WRITE (*,FMT='(a)') & + '----------------------------------------------' + RETURN + +50 PRINT *,'Hmmmm. I am unable to determine the size.' + PRINT *,'Please enter the number of Bytes per DOUBLE PRECISION', & + ' number : ' + READ (*,FMT=*) realsize + IF (realsize.NE.4 .AND. realsize.NE.8) THEN + PRINT *,'Your answer ',realsize,' does not make sense.' + PRINT *,'Try again.' + PRINT *,'Please enter the number of Bytes per ', & + 'DOUBLE PRECISION number : ' + READ (*,FMT=*) realsize + END IF + PRINT *,'You have manually entered a size of ',realsize, & + ' bytes per DOUBLE PRECISION number' + WRITE (*,FMT='(a)') & + '----------------------------------------------' + END FUNCTION realsize + + SUBROUTINE confuse(q,r) + ! IMPLICIT NONE + ! .. Scalar Arguments .. + DOUBLE PRECISION q,r + ! .. + ! .. Intrinsic Functions .. + INTRINSIC cos + ! .. + r = cos(q) + RETURN +END SUBROUTINE confuse + +! A semi-portable way to determine the clock granularity +! Adapted from a code by John Henning of Digital Equipment Corporation +! +INTEGER FUNCTION checktick() + ! IMPLICIT NONE + + ! .. Parameters .. + INTEGER n + PARAMETER (n=20) + ! .. + ! .. Local Scalars .. + DOUBLE PRECISION t1,t2 + INTEGER i,j,jmin + ! .. + ! .. Local Arrays .. + DOUBLE PRECISION timesfound(n) + ! .. + ! .. External Functions .. + DOUBLE PRECISION mysecond + EXTERNAL mysecond + ! .. + ! .. Intrinsic Functions .. + INTRINSIC max,min,nint + ! .. + i = 0 + t1=-1 +10 t2 = mysecond() + IF (t2.EQ.t1) GO TO 10 + + t1 = t2 + i = i + 1 + timesfound(i) = t1 + IF (i.LT.n) GO TO 10 + + jmin = 1000000 + DO 20 i = 2,n + j = nint((timesfound(i)-timesfound(i-1))*1d6) + jmin = min(jmin,max(j,0)) +20 END DO + + IF (jmin.GT.0) THEN + checktick = jmin + ELSE + PRINT *,'Your clock granularity appears to be less ', & + 'than one microsecond' + checktick = 1 + END IF + RETURN + + ! PRINT 14, timesfound(1)*1d6 + ! DO 20 i=2,n + ! PRINT 14, timesfound(i)*1d6, + ! & nint((timesfound(i)-timesfound(i-1))*1d6) + ! 14 FORMAT (1X, F18.4, 1X, i8) + ! 20 END DO + +END FUNCTION checktick + + + + +SUBROUTINE checksums(a,b,c,n,ntimes) + ! IMPLICIT NONE + ! .. + ! .. Arguments .. + DOUBLE PRECISION a(*),b(*),c(*) + INTEGER*8 n,ntimes + ! .. + ! .. Local Scalars .. + DOUBLE PRECISION aa,bb,cc,scalar,suma,sumb,sumc,epsilon + INTEGER k + ! .. + + ! Repeat the main loop, but with scalars only. + ! This is done to check the sum & make sure all + ! iterations have been executed correctly. + + aa = 2.0D0 + bb = 0.5D0 + cc = 0.0D0 + aa = 0.5D0*aa + scalar = 0.5d0*aa + DO k = 1,ntimes + cc = aa + bb = scalar*cc + cc = aa + bb + aa = bb + scalar*cc + END DO + aa = aa*DBLE(n-2) + bb = bb*DBLE(n-2) + cc = cc*DBLE(n-2) + + ! Now sum up the arrays, excluding the first and last + ! elements, which are modified using the timing results + ! to confuse aggressive optimizers. + + suma = 0.0d0 + sumb = 0.0d0 + sumc = 0.0d0 + !$OMP PARALLEL DO REDUCTION(+:suma,sumb,sumc) + DO 110 j = 2,n-1 + suma = suma + a(j) + sumb = sumb + b(j) + sumc = sumc + c(j) +110 END DO + + epsilon = 1.D-6 + + IF (ABS(suma-aa)/suma .GT. epsilon) THEN + PRINT *,'Failed Validation on array a()' + PRINT *,'Target Sum of a is = ',aa + PRINT *,'Computed Sum of a is = ',suma + ELSEIF (ABS(sumb-bb)/sumb .GT. epsilon) THEN + PRINT *,'Failed Validation on array b()' + PRINT *,'Target Sum of b is = ',bb + PRINT *,'Computed Sum of b is = ',sumb + ELSEIF (ABS(sumc-cc)/sumc .GT. epsilon) THEN + PRINT *,'Failed Validation on array c()' + PRINT *,'Target Sum of c is = ',cc + PRINT *,'Computed Sum of c is = ',sumc + ELSE + PRINT *,'Solution Validates!' + ENDIF + +END SUBROUTINE checksums + +function itoa(i) result(res) + character(:),allocatable :: res + integer,intent(in) :: i + character(range(i)+2) :: tmp + write(tmp,'(i0)') i + res = trim(tmp) +end function itoa + +END MODULE stream_mod diff --git a/src/programs/fiat-drhook-sanity.F90 b/src/programs/fiat-drhook-sanity.F90 new file mode 100644 index 00000000..5fdf09c5 --- /dev/null +++ b/src/programs/fiat-drhook-sanity.F90 @@ -0,0 +1,74 @@ +program drhook_sanity + use parkind1, only: jpim, jprb, jprd + use oml_mod ,only : oml_max_threads + use mpl_module + use yomhook, only : LHOOK,DR_HOOK,JPHOOK,dr_hook_init,dr_hook_end + use stream_mod + use gemm_mod + implicit none + logical :: luse_mpi = .true. + integer :: myproc,nproc,nthread + integer :: verbosity = 0 + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + luse_mpi = detect_mpirun() + + if (luse_mpi) then + call mpl_init(ldinfo=(verbosity>=1)) + nproc = mpl_nproc() + myproc = mpl_myrank() + else + nproc = 1 + myproc = 1 + mpl_comm = -1 + endif + + nthread= oml_max_threads() + if (myproc.eq.1) write(6,*)'Starting Tasks=',nproc,'threads=',nthread + + call dr_hook_init() + + IF (LHOOK) CALL DR_HOOK('MAIN',0,ZHOOK_HANDLE) + + call stream_combinations() + +#if defined(HAVE_BLAS) + call gemm_combinations() +#endif + + IF (LHOOK) CALL DR_HOOK('MAIN',1,ZHOOK_HANDLE) + + call dr_hook_end() + + if (luse_mpi) then + call mpl_end(ldmeminfo=.false.) + endif + if (myproc.eq.1) write(6,*)'Completed' +contains + function detect_mpirun() result(lmpi_required) + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 5 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv_dr_hook_assert_mpi_initialized + integer :: ivar + + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced + + lmpi_required = .false. + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo +end function + +end program drhook_sanity diff --git a/src/programs/mysecond.c b/src/programs/mysecond.c new file mode 100644 index 00000000..d206a4ae --- /dev/null +++ b/src/programs/mysecond.c @@ -0,0 +1,27 @@ +/* A gettimeofday routine to give access to the wall + clock timer on most UNIX-like systems. + + This version defines two entry points -- with + and without appended underscores, so it *should* + automagically link with FORTRAN */ + +#include + +double mysecond() +{ +/* struct timeval { long tv_sec; + long tv_usec; }; + +struct timezone { int tz_minuteswest; + int tz_dsttime; }; */ + + struct timeval tp; + struct timezone tzp; + int i; + + i = gettimeofday(&tp,&tzp); + return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); +} + +double mysecond_() {return mysecond();} + From e3ff3193860f372c6419618209d77fab88c8325f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 24 Jul 2024 14:05:38 +0000 Subject: [PATCH 02/32] Make fiat-drhook-sanity a unit-test, refactor CMake and other cosmetic cleanup --- CMakeLists.txt | 11 +- cmake/FindPAPI.cmake | 4 +- src/fiat/CMakeLists.txt | 6 +- src/fiat/drhook/drhook.c | 25 ++--- src/fiat/drhook/drhook_papi.c | 84 ++++++++------- src/fiat/drhook/drhook_papi.h | 8 +- .../internal/drhook_run_omp_parallel.F90 | 17 ++- src/programs/CMakeLists.txt | 28 ----- src/programs/fiat-drhook-sanity.F90 | 74 ------------- src/programs/mysecond.c | 27 ----- tests/CMakeLists.txt | 42 ++++++++ tests/test_drhook_counters.F90 | 100 ++++++++++++++++++ .../test_drhook_counters_gemm.F90 | 12 ++- .../test_drhook_counters_stream.F90 | 42 ++++---- 14 files changed, 248 insertions(+), 232 deletions(-) delete mode 100644 src/programs/fiat-drhook-sanity.F90 delete mode 100644 src/programs/mysecond.c create mode 100644 tests/test_drhook_counters.F90 rename src/programs/fiat-drhook-sanity-gemm.F90 => tests/test_drhook_counters_gemm.F90 (88%) rename src/programs/fiat-drhook-sanity-stream.F90 => tests/test_drhook_counters_stream.F90 (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index f175cf53..afd05b43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,9 +29,9 @@ ecbuild_add_option( FEATURE MPI DESCRIPTION "Support for MPI distributed parallelism" REQUIRED_PACKAGES "MPI COMPONENTS Fortran" ) -ecbuild_add_option( FEATURE PAPI - DESCRIPTION "Support for HW counters in drhook via PAPI" - REQUIRED_PACKAGES "OpenMP COMPONENTS C" "PAPI") +ecbuild_add_option( FEATURE DR_HOOK_PAPI + DESCRIPTION "Support for HW counters in DR_HOOK via PAPI" + REQUIRED_PACKAGES "PAPI") ecbuild_find_package( fckit QUIET ) ecbuild_add_option( FEATURE FCKIT @@ -54,11 +54,6 @@ ecbuild_add_option( FEATURE WARNINGS DEFAULT ON DESCRIPTION "Add warnings to compiler" ) -ecbuild_add_option( FEATURE MKL - DESCRIPTION "Use MKL for BLAS and/or FFTW" - DEFAULT ON - REQUIRED_PACKAGES "MKL" ) - ecbuild_find_package( NAME Realtime QUIET ) ### Sources diff --git a/cmake/FindPAPI.cmake b/cmake/FindPAPI.cmake index f778f51a..35c3e6f9 100644 --- a/cmake/FindPAPI.cmake +++ b/cmake/FindPAPI.cmake @@ -24,12 +24,12 @@ find_path(PAPI_ROOT find_library(PAPI_LIBRARIES # Pick the static library first for easier run-time linking. NAMES libpapi.so libpapi.a papi - HINTS ${PAPI_ROOT}/lib ${HILTIDEPS}/lib + HINTS ${PAPI_ROOT}/lib ) find_path(PAPI_INCLUDE_DIRS NAMES papi.h - HINTS ${PAPI_ROOT}/include ${HILTIDEPS}/include + HINTS ${PAPI_ROOT}/include ) include(FindPackageHandleStandardArgs) diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index ecae913f..624a195d 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -86,15 +86,13 @@ else() endif() if( HAVE_OMP ) - target_link_libraries( fiat PRIVATE OpenMP::OpenMP_Fortran ) - endif() -if ( HAVE_PAPI ) +if ( HAVE_DR_HOOK_PAPI ) target_link_libraries ( fiat PRIVATE ${PAPI_LIBRARIES} ) target_include_directories ( fiat PRIVATE ${PAPI_INCLUDE_DIRS} ) - target_compile_definitions ( fiat PRIVATE HKPAPI ) + target_compile_definitions ( fiat PRIVATE DR_HOOK_HAVE_PAPI=1 ) endif() fiat_target_ignore_missing_symbols( TARGET fiat SYMBOLS diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 65eb8190..9836cc56 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -473,7 +473,7 @@ typedef struct drhook_key_t { long long int mem_maxhwm, mem_maxrss, mem_maxstk, mem_maxpagdelta; long long int paging_in; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) long_long counters_in[NPAPICNTRS]; long_long delta_counters_all[NPAPICNTRS]; long_long delta_counters_child[NPAPICNTRS]; @@ -507,7 +507,7 @@ typedef struct drhook_prof_t { double pc; double total; double self; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) long_long counter_tot[NPAPICNTRS]; long_long counter_self[NPAPICNTRS]; #endif @@ -1060,7 +1060,7 @@ remove_calltree(int tid, drhook_key_t *keyptr, if (treeptr->prev) { drhook_key_t *parent_keyptr = treeptr->prev->keyptr; if (parent_keyptr) { /* extra security */ -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_add(NULL, parent_keyptr->delta_counters_child, delta_counters @@ -2827,7 +2827,7 @@ getkey(int tid, const char *name, int name_len, if (opt_walltime) keyptr->wall_in = walltime ? *walltime : WALLTIME(); if (opt_cputime) keyptr->cpu_in = cputime ? *cputime : CPUTIME(); if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_readAll(keyptr->counters_in); #endif if (any_memstat) memstat(keyptr,&tid,1); @@ -2932,7 +2932,7 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, double delta_wall = 0; double delta_cpu = 0; long_long * delta_counters=NULL; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) delta_counters=alloca(drhook_papi_num_counters() * sizeof(long_long) ); drhook_papi_bzero(delta_counters); #endif @@ -2965,7 +2965,7 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, if (opt_walltime) keyptr->delta_wall_all += delta_wall; if (opt_cputime) keyptr->delta_cpu_all += delta_cpu; if (opt_cycles) keyptr->delta_cycles_all += delta_cycles; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_subtract(delta_counters, NULL , keyptr->counters_in); drhook_papi_add(NULL, keyptr->delta_counters_all, delta_counters); #endif @@ -3095,7 +3095,7 @@ itself(drhook_key_t *keyptr_self, if (opt == 0) { if (opt_wallprof) keyptr->wall_in = walltime ? *walltime : WALLTIME(); else keyptr->cpu_in = cputime ? *cputime : CPUTIME(); -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_readAll(keyptr->counters_in); #endif keyptr->calls++; @@ -3112,7 +3112,8 @@ itself(drhook_key_t *keyptr_self, } if (delta_time) *delta_time = delta; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) + long_long cntrs_delta[NPAPICNTRS]; /* cntrs_delta = current - counters_in */ @@ -3412,7 +3413,7 @@ c_drhook_check_watch_(const char *where, } /*** PUBLIC ***/ -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) #define PAPIREAD \ long_long cntrs[NPAPICNTRS]; \ drhook_papi_readAll(cntrs) @@ -3509,7 +3510,7 @@ c_drhook_init_(const char *progname, tabort_delete_lockfile(); drhook_delete_lockfile(); } -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_init(myproc -1); #endif @@ -4308,7 +4309,7 @@ c_drhook_print_(const int *ftnunitno, drhook_key_t *keyptr = &keydata[t][j]; while (keyptr) { if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) { -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_subtract(p->counter_self, keyptr->delta_counters_all, keyptr->delta_counters_child); @@ -4580,7 +4581,7 @@ c_drhook_print_(const int *ftnunitno, } /* for (j=0; j #include @@ -8,20 +8,22 @@ #define STD_MSG_LEN 4096 -int * drhook_papi_event_set=NULL; -enum {drhook_papi_notstarted,drhook_papi_running,drhook_papi_failed}; -int drhook_papi_state=0; +int* drhook_papi_event_set=NULL; +enum { + drhook_papi_notstarted, + drhook_papi_running, + drhook_papi_failed +}; +int drhook_papi_state=drhook_papi_notstarted; int drhook_papi_rank=0; /* C style! */ -size_t drhook_max_counter_name=0; /* hardwired for now */ -const char * hookCounters[ NPAPICNTRS ][2]= - { +const char* hookCounters[NPAPICNTRS][2]= { {"PAPI_TOT_CYC","Cycles"}, {"PAPI_FP_OPS","FP Operations"}, {"PAPI_L1_DCA","L1 Access"}, {"PAPI_L2_DCM","L2 Miss"} - }; +}; /* function to use for thread id - it should be better than omp_get_thread_num! @@ -30,7 +32,7 @@ unsigned long safe_thread_num(){ return oml_my_thread()-1; } -const char * drhook_papi_counter_name(int c,int t){ +const char* drhook_papi_counter_name(int c,int t){ return hookCounters[c][t]; } @@ -46,7 +48,7 @@ void drhook_papi_bzero(long_long* a){ } } -void drhook_papi_print(char * s,long_long* a,int header){ +void drhook_papi_print(char* s, long_long* a, int header){ char msg[STD_MSG_LEN]; if (header>0){ char fmt[STD_MSG_LEN]; @@ -88,8 +90,6 @@ void drhook_papi_add(long_long* a,long_long* b, long_long* c){ } } - - // number of counters available to read int drhook_papi_num_counters(){ return NPAPICNTRS; @@ -115,7 +115,7 @@ int drhook_papi_readAll(long_long * counterArray){ printf("DRHOOK:PAPI:PAPI_read: Error reading counters, thread=%ld es=%d %s\n",safe_thread_num(),drhook_papi_event_set[safe_thread_num()],PAPI_strerror(err)); } #if defined(DEBUG) - drhook_papi_print("readAll:",counterArray); + drhook_papi_print("readAll:",counterArray,0); #endif return err; } @@ -142,7 +142,7 @@ int drhook_papi_init(int rank){ } paperr=PAPI_library_init(PAPI_VER_CURRENT); - if (paperr != PAPI_VER_CURRENT){ + if (paperr != PAPI_VER_CURRENT){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI:PAPI_library_init: ret code=%d version loaded =%d ", paperr,PAPI_VER_CURRENT); printf("%s\n",pmsg); @@ -211,7 +211,7 @@ int drhook_papi_init(int rank){ return 1; } -int dr_hook_papi_start_threads(int * events){ +int dr_hook_papi_start_threads(int* events){ int thread=safe_thread_num(); int papiErr; char pmsg[STD_MSG_LEN]; @@ -222,18 +222,23 @@ int dr_hook_papi_start_threads(int * events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: create event set failed (%s) \n",PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; - } else - printf("Event set %d created for thread %d\n",events[thread],thread); + } + + printf("DRHOOK:PAPI: Event set %d created for thread %d\n",events[thread],thread); int prof_papi_numcntrs=NPAPICNTRS; - for (int counter=0;counter < prof_papi_numcntrs ;counter ++){ + for (int counter=0; counter < prof_papi_numcntrs; counter ++){ int eventCode; snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: %s (%s)",hookCounters[counter][0],hookCounters[counter][1]); - if (drhook_papi_rank==0) if (thread==0)printf("%s\n",pmsg); + if (drhook_papi_rank==0) { + if (thread==0) { + printf("%s\n",pmsg); + } + } papiErr=PAPI_event_name_to_code(hookCounters[counter][0],&eventCode); - if (papiErr !=PAPI_OK){ + if (papiErr != PAPI_OK){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: event name to code failed (%s)",PAPI_strerror(papiErr)); printf("%s\n",pmsg); PAPI_perror("initPapi"); @@ -245,22 +250,25 @@ int dr_hook_papi_start_threads(int * events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: add_event failed: %d (%s)",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); if (papiErr == PAPI_EINVAL) - printf("Invalid argumet"); + printf("Invalid argumet"); else if (papiErr == PAPI_ENOMEM) - printf("Out of Mmemory"); + printf("Out of Mmemory"); else if (papiErr == PAPI_ENOEVST) - printf("EventSet does not exist"); + printf("EventSet does not exist"); else if (papiErr == PAPI_EISRUN) - printf("EventSet is running"); + printf("EventSet is running"); else if (papiErr == PAPI_ECNFLCT) - printf("Conflict"); + printf("Conflict"); else if (papiErr == PAPI_ENOEVNT) - printf("Preset not available"); + printf("Preset not available"); return 0; - }else { + } + else { #if defined(DEBUG) - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Added code=%d to Evnt set %d",events[thread]); - if (thread==0)printf("%s\n",pmsg); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Added code=%d to Event set %d",eventCode, events[thread]); + if (thread==0) { + printf("%s\n",pmsg); + } #endif } } @@ -272,15 +280,13 @@ int dr_hook_papi_start_threads(int * events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error querying events - %d=%s",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; - }else { + } #if defined(DEBUG) - for (counter=0;counter -#endif +#if defined(DR_HOOK_HAVE_PAPI) #include @@ -12,7 +8,7 @@ int drhook_papi_init(int rank); int drhook_papi_num_counters(); -const char * drhook_papi_counter_name(int c,int t); +const char* drhook_papi_counter_name(int c,int t); long_long drhook_papi_read(int counterId); int drhook_papi_readAll(long_long * counterArray); diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index c0a1fccf..6c3c0a02 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -10,18 +10,17 @@ ! These functions are to be used within drhook C methods, to avoid having OMP pragmas there. -module hook_papi_interface -#ifdef HKPAPI - +module drhook_papi_interface +#if defined(DR_HOOK_HAVE_PAPI) interface function dr_hook_papi_start_threads ( events) bind ( c ) - use :: iso_c_binding - INTEGER(KIND=C_INT) :: dr_hook_papi_start_threads - INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(*) + use, intrinsic :: iso_c_binding, only : c_int + integer(kind=c_int) :: dr_hook_papi_start_threads + integer(kind=c_int), intent(inout) :: events(*) end function dr_hook_papi_start_threads end interface #endif -end module hook_papi_interface +end module drhook_papi_interface subroutine drhook_run_omp_parallel_ipfstr(NTIDS, FUNC, CDSTR) ! Usage: @@ -75,11 +74,11 @@ subroutine drhook_run_omp_parallel_get_cycles(NTIDS, NCYCLES) !$OMP END PARALLEL end subroutine drhook_run_omp_parallel_get_cycles -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) subroutine drhook_run_omp_parallel_papi_startup(events,n) bind(c) use, intrinsic :: iso_c_binding, only : c_char, c_int, c_double - use hook_papi_interface + use drhook_papi_interface use OML_MOD implicit none INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(n) diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index 1fb2787c..dd4696d3 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -68,31 +68,3 @@ check_c_compiler_flag( "-Wno-implicit-function-declaration" disable_warning_impl if( disable_warning_implicit_function_declaration ) target_compile_options( fiat-printbinding PRIVATE "-Wno-implicit-function-declaration" ) endif() - -### Program fiat-drhook-sanity -# Note: it only depends on MPI and OpenMP -ecbuild_add_executable(TARGET fiat-drhook-sanity - SOURCES fiat-drhook-sanity.F90 fiat-drhook-sanity-stream.F90 fiat-drhook-sanity-gemm.F90 mysecond.c - LIBS fiat parkind_dp - ) -target_compile_definitions( fiat-drhook-sanity PRIVATE OMP ) - -ecbuild_info( CMAKE_Fortran_COMPILER_ID - 1. ${CMAKE_Fortran_COMPILER_ID} 2. ${EC_COMPILER_FAMILY} ) -if( ${CMAKE_Fortran_COMPILER_ID} MATCHES Intel ) - set_source_files_properties(fiat-drhook-sanity-stream.f90 PROPERTIES COMPILE_FLAGS " -qopt-prefetch-distance=64,12 -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-zmm-usage=high ") -endif() - -find_package( OpenMP COMPONENTS Fortran ) -if( TARGET OpenMP::OpenMP_Fortran ) - target_link_libraries( fiat-drhook-sanity OpenMP::OpenMP_Fortran ) -endif() -target_link_libraries( fiat-drhook-sanity ${MKL_LIBRARIES} ) - -find_package( MPI COMPONENTS C ) -if( HAVE_MPI AND TARGET MPI::MPI_C ) - target_link_libraries( fiat-drhook-sanity MPI::MPI_C ) -else() - target_compile_definitions( fiat-drhook-sanity PRIVATE NOMPI ) -endif() - - diff --git a/src/programs/fiat-drhook-sanity.F90 b/src/programs/fiat-drhook-sanity.F90 deleted file mode 100644 index 5fdf09c5..00000000 --- a/src/programs/fiat-drhook-sanity.F90 +++ /dev/null @@ -1,74 +0,0 @@ -program drhook_sanity - use parkind1, only: jpim, jprb, jprd - use oml_mod ,only : oml_max_threads - use mpl_module - use yomhook, only : LHOOK,DR_HOOK,JPHOOK,dr_hook_init,dr_hook_end - use stream_mod - use gemm_mod - implicit none - logical :: luse_mpi = .true. - integer :: myproc,nproc,nthread - integer :: verbosity = 0 - - REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - - luse_mpi = detect_mpirun() - - if (luse_mpi) then - call mpl_init(ldinfo=(verbosity>=1)) - nproc = mpl_nproc() - myproc = mpl_myrank() - else - nproc = 1 - myproc = 1 - mpl_comm = -1 - endif - - nthread= oml_max_threads() - if (myproc.eq.1) write(6,*)'Starting Tasks=',nproc,'threads=',nthread - - call dr_hook_init() - - IF (LHOOK) CALL DR_HOOK('MAIN',0,ZHOOK_HANDLE) - - call stream_combinations() - -#if defined(HAVE_BLAS) - call gemm_combinations() -#endif - - IF (LHOOK) CALL DR_HOOK('MAIN',1,ZHOOK_HANDLE) - - call dr_hook_end() - - if (luse_mpi) then - call mpl_end(ldmeminfo=.false.) - endif - if (myproc.eq.1) write(6,*)'Completed' -contains - function detect_mpirun() result(lmpi_required) - logical :: lmpi_required - integer :: ilen - integer, parameter :: nvars = 5 - character(len=32), dimension(nvars) :: cmpirun_detect - character(len=4) :: clenv_dr_hook_assert_mpi_initialized - integer :: ivar - - ! Environment variables that are set when mpirun, srun, aprun, ... are used - cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi - cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe - cmpirun_detect(3) = 'PMI_SIZE' ! intel - cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm - cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced - - lmpi_required = .false. - do ivar = 1, nvars - call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) - if (ilen > 0) then - lmpi_required = .true. - exit ! break - endif - enddo -end function - -end program drhook_sanity diff --git a/src/programs/mysecond.c b/src/programs/mysecond.c deleted file mode 100644 index d206a4ae..00000000 --- a/src/programs/mysecond.c +++ /dev/null @@ -1,27 +0,0 @@ -/* A gettimeofday routine to give access to the wall - clock timer on most UNIX-like systems. - - This version defines two entry points -- with - and without appended underscores, so it *should* - automagically link with FORTRAN */ - -#include - -double mysecond() -{ -/* struct timeval { long tv_sec; - long tv_usec; }; - -struct timezone { int tz_minuteswest; - int tz_dsttime; }; */ - - struct timeval tp; - struct timezone tzp; - int i; - - i = gettimeofday(&tp,&tzp); - return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); -} - -double mysecond_() {return mysecond();} - diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 22c73c98..56d3f479 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,8 @@ if( HAVE_TESTS ) +set( CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + if( HAVE_MPI AND MPIEXEC ) set( LAUNCH ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ) else() @@ -105,6 +107,46 @@ set_tests_properties(fiat_test_drhook_fortran PROPERTIES ENVIRONMENT "MPL=0;DR_HOOK_ASSERT_MPI_INITIALIZED=0;DR_HOOK_OPT=NOPROPAGATE_SIGNALS" PASS_REGULAR_EXPRESSION "EC_DRHOOK.*\[DrHookCallTree\]" ) + +# ---------------------------------------------------------------------------------------- +# Tests: fiat_test_drhook_counters + +if( HAVE_DR_HOOK_PAPI ) + ecbuild_add_test(TARGET fiat_test_drhook_counters + SOURCES test_drhook_counters.F90 + test_drhook_counters_stream.F90 + test_drhook_counters_gemm.F90 + LIBS fiat + ENVIRONMENT "DR_HOOK_ASSERT_MPI_INITIALIZED=0;FIAT_UNIT_TEST=1" + ) + target_compile_definitions( fiat_test_drhook_counters PRIVATE OMP ) + if( NOT HAVE_MPI ) + target_compile_definitions( fiat_test_drhook_counters PRIVATE NOMPI ) + endif() + + if( CMAKE_Fortran_COMPILER_ID MATCHES Intel ) + set_source_files_properties(test_drhook_counters_stream.F90 + PROPERTIES COMPILE_OPTIONS "-qopt-prefetch-distance=64,12;-qopt-streaming-cache-evict=0;-qopt-streaming-stores always;-qopt-zmm-usage=high") + endif() + + find_package( OpenMP COMPONENTS Fortran ) + if( TARGET OpenMP::OpenMP_Fortran ) + target_link_libraries( fiat_test_drhook_counters OpenMP::OpenMP_Fortran ) + endif() + if( NOT BLAS_LIBRARIES ) + find_package( MKL QUIET ) + if( MKL_LIBRARIES ) + set( BLAS_LIBRARIES ${MKL_LIBRARIES} ) + else() + find_package( BLAS QUIET ) + endif() + endif() + if( BLAS_LIBRARIES ) + target_link_libraries( fiat_test_drhook_counters ${BLAS_LIBRARIES} ) + target_compile_definitions( fiat_test_drhook_counters PUBLIC HAVE_BLAS ) + endif() +endif() + # ---------------------------------------------------------------------------------------- # Tests: fiat_test_ec_args_fortran diff --git a/tests/test_drhook_counters.F90 b/tests/test_drhook_counters.F90 new file mode 100644 index 00000000..d425e284 --- /dev/null +++ b/tests/test_drhook_counters.F90 @@ -0,0 +1,100 @@ +program fiat_test_drhook_counters + use oml_mod ,only : oml_max_threads + use mpl_module, only : mpl_init, mpl_end, mpl_nproc, mpl_myrank + use yomhook, only : LHOOK,DR_HOOK,JPHOOK,dr_hook_init,dr_hook_end + use test_drhook_counters_stream_mod, only : stream_combinations + use test_drhook_counters_gemm_mod, only : gemm_combinations + use ec_env_mod, only : ec_setenv + + implicit none + logical :: luse_mpi = .true. + logical :: lsmall_problem_size = .false. + integer :: myproc,nproc + integer :: verbosity = 0 + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + luse_mpi = detect_mpirun() + lsmall_problem_size = detect_FIAT_UNIT_TEST() + + if (luse_mpi) then + call mpl_init(ldinfo=(verbosity>=1)) + nproc = mpl_nproc() + myproc = mpl_myrank() + else + nproc = 1 + myproc = 1 + endif + + if (myproc.eq.1) write(6,*)'Starting Tasks=',nproc,'threads=',oml_max_threads() + + call ec_setenv("DR_HOOK", "1", overwrite=.true.) + call ec_setenv("DR_HOOK_OPT", "COUNTERS", overwrite=.true.) + + call dr_hook_init() + + IF (LHOOK) CALL DR_HOOK('MAIN',0,ZHOOK_HANDLE) + + if (myproc.eq.1) write(6,*) "================================================= BENCHMARK STREAM START" + if (lsmall_problem_size) then + call stream_combinations(int(1024*32,kind=8)) + else + call stream_combinations() + endif + if (myproc.eq.1) write(6,*) "================================================= BENCHMARK STREAM END" + + if (myproc.eq.1) write(6,*) "================================================= BENCHMARK GEMM START" + if (lsmall_problem_size) then + call gemm_combinations(int(250,kind=8)) + else + call gemm_combinations() + endif + write(6,*) "================================================= BENCHMARK GEMM END" + + IF (LHOOK) CALL DR_HOOK('MAIN',1,ZHOOK_HANDLE) + + call dr_hook_end() + + if (luse_mpi) then + call mpl_end(ldmeminfo=.false.) + endif + if (myproc.eq.1) write(6,*)'Completed' +contains + function detect_mpirun() result(lmpi_required) + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 5 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv_dr_hook_assert_mpi_initialized + integer :: ivar + lmpi_required = .false. +#if defined(NOMPI) + return +#endif + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + cmpirun_detect(5) = 'FIAT_USE_MPI' ! forced + + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo +end function + +function detect_FIAT_UNIT_TEST() result(lunit_test) + logical :: lunit_test + integer :: ilen + lunit_test = .false. + call get_environment_variable(name='FIAT_UNIT_TEST', length=ilen) + if (ilen > 0) then + lunit_test = .true. + endif +end function + +end program diff --git a/src/programs/fiat-drhook-sanity-gemm.F90 b/tests/test_drhook_counters_gemm.F90 similarity index 88% rename from src/programs/fiat-drhook-sanity-gemm.F90 rename to tests/test_drhook_counters_gemm.F90 index b82cb87c..bb9affb3 100644 --- a/src/programs/fiat-drhook-sanity-gemm.F90 +++ b/tests/test_drhook_counters_gemm.F90 @@ -1,13 +1,17 @@ -module gemm_mod +module test_drhook_counters_gemm_mod use yomhook, only : lhook,dr_hook,jphook implicit none contains - subroutine gemm_combinations() + subroutine gemm_combinations(n_init) implicit none - integer*8 n,i + integer(kind=8), intent(in), optional :: n_init + integer(kind=8) :: n,i real(kind=jphook) :: zhook_handle n=1000 + if (present(n_init)) then + n = n_init + endif #if defined(HAVE_BLAS) if (lhook) call dr_hook('GEMM_ALL',0,zhook_handle) do i=1,4 @@ -81,4 +85,4 @@ subroutine sgemm_driver(nn) end subroutine sgemm_driver #endif -end module gemm_mod +end module diff --git a/src/programs/fiat-drhook-sanity-stream.F90 b/tests/test_drhook_counters_stream.F90 similarity index 95% rename from src/programs/fiat-drhook-sanity-stream.F90 rename to tests/test_drhook_counters_stream.F90 index 2dbd1ac5..ee575b11 100644 --- a/src/programs/fiat-drhook-sanity-stream.F90 +++ b/tests/test_drhook_counters_stream.F90 @@ -1,4 +1,4 @@ -MODULE stream_mod +MODULE test_drhook_counters_stream_mod !======================================================================= ! Program: STREAM ! Programmer: John D. McCalpin @@ -45,19 +45,23 @@ MODULE stream_mod use yomhook, only : lhook,dr_hook,jphook contains - subroutine stream_combinations() + subroutine stream_combinations(n_init) implicit none - integer*8 n,ntimes,i + integer(kind=8), intent(in), optional :: n_init + integer(kind=8) :: n, ntimes, i real(kind=jphook) :: zhook_handle n=1024*1024 + if (present(n_init)) then + n = n_init + endif ntimes=1024 if (lhook) call dr_hook('STREAM',0,zhook_handle) do i=1,3 + write(6,'(" =============================== CALL STREAM(",I0,",",I0,")")') n, ntimes call stream(n,ntimes) n=n*8 ntimes=ntimes/8 end do - if (lhook) call dr_hook('STREAM',1,zhook_handle) end subroutine stream_combinations @@ -79,7 +83,7 @@ SUBROUTINE stream(n,ntimes) CHARACTER label(4)*11 ! .. ! .. External Functions .. - DOUBLE PRECISION mysecond + DOUBLE PRECISION timef REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JPHOOK) :: ZHOOK_1,ZHOOK_2,ZHOOK_3,ZHOOK_4 CHARACTER(len=29) :: tag @@ -142,12 +146,12 @@ SUBROUTINE stream(n,ntimes) b(j) = 0.5D0 c(j) = 0.0D0 10 END DO - t = mysecond() + t = timef() !$OMP PARALLEL DO DO 20 j = 1,n a(j) = 0.5d0*a(j) 20 END DO - t = mysecond() - t + t = timef() - t PRINT *,'----------------------------------------------------' quantum = checktick() WRITE (*,FMT=9000) & @@ -160,51 +164,51 @@ SUBROUTINE stream(n,ntimes) DO 70 k = 1,ntimes IF (LHOOK) CALL DR_HOOK('STREAM_COPY'//TRIM(tag),0,ZHOOK_1) - t = mysecond() + t = timef() a(1) = a(1) + t !$OMP PARALLEL DO DO 30 j = 1,n c(j) = a(j) 30 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_COPY'//TRIM(tag),1,ZHOOK_1) c(n) = c(n) + t times(1,k) = t IF (LHOOK) CALL DR_HOOK('STREAM_SCALE'//TRIM(tag),0,ZHOOK_2) - t = mysecond() + t = timef() c(1) = c(1) + t !$OMP PARALLEL DO DO 40 j = 1,n b(j) = scalar*c(j) 40 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_SCALE'//TRIM(tag),1,ZHOOK_2) b(n) = b(n) + t times(2,k) = t IF (LHOOK) CALL DR_HOOK('STREAM_ADD'//TRIM(tag),0,ZHOOK_3) - t = mysecond() + t = timef() a(1) = a(1) + t !$OMP PARALLEL DO DO 50 j = 1,n c(j) = a(j) + b(j) 50 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_ADD'//TRIM(tag),1,ZHOOK_3) c(n) = c(n) + t times(3,k) = t IF (LHOOK) CALL DR_HOOK('STREAM_TRIAD'//TRIM(tag),0,ZHOOK_4) - t = mysecond() + t = timef() b(1) = b(1) + t !$OMP PARALLEL DO DO 60 j = 1,n a(j) = b(j) + scalar*c(j) 60 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_TRIAD'//TRIM(tag),1,ZHOOK_4) a(n) = a(n) + t @@ -341,15 +345,15 @@ INTEGER FUNCTION checktick() DOUBLE PRECISION timesfound(n) ! .. ! .. External Functions .. - DOUBLE PRECISION mysecond - EXTERNAL mysecond + DOUBLE PRECISION timef + EXTERNAL timef ! .. ! .. Intrinsic Functions .. INTRINSIC max,min,nint ! .. i = 0 t1=-1 -10 t2 = mysecond() +10 t2 = timef() IF (t2.EQ.t1) GO TO 10 t1 = t2 @@ -457,4 +461,4 @@ function itoa(i) result(res) res = trim(tmp) end function itoa -END MODULE stream_mod +END MODULE From bcd9d788918c23c016aafa07c38eca8872df7a6b Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 24 Jul 2024 15:10:52 +0000 Subject: [PATCH 03/32] Fix invalid arguments error to PAPI_list_events --- src/fiat/drhook/drhook_papi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index ce7f685d..21b44a9a 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -273,10 +273,10 @@ int dr_hook_papi_start_threads(int* events){ } } - int number; - int * checkEvents=malloc(prof_papi_numcntrs*sizeof(int)); - papiErr = PAPI_list_events(events[thread],checkEvents , &number); - if ( papiErr != PAPI_OK){ + int number = prof_papi_numcntrs; + int* checkEvents=malloc(prof_papi_numcntrs*sizeof(int)); + papiErr = PAPI_list_events(events[thread], checkEvents, &number); + if (papiErr != PAPI_OK){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error querying events - %d=%s",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; From 374e33afa4386bd848c96b6d15fe967bd7576185 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 24 Jul 2024 15:13:06 +0000 Subject: [PATCH 04/32] Fix DR_HOOK_SILENT option --- src/fiat/drhook/drhook_papi.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index 21b44a9a..ed044b10 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -8,6 +8,8 @@ #define STD_MSG_LEN 4096 +static int silent = 0; + int* drhook_papi_event_set=NULL; enum { drhook_papi_notstarted, @@ -129,8 +131,8 @@ int drhook_papi_init(int rank){ int lib_version; char pmsg[STD_MSG_LEN]; int paperr=-1; + char *env; - if (drhook_papi_state==drhook_papi_running) return 1; if (drhook_papi_state==drhook_papi_failed) return 0; @@ -140,6 +142,9 @@ int drhook_papi_init(int rank){ printf("DRHOOK:PAPI: Tried to initialise from a parallel region :-(\n"); return 0; } + + env = getenv("DR_HOOK_SILENT"); + silent = env ? atoi(env) : silent; paperr=PAPI_library_init(PAPI_VER_CURRENT); if (paperr != PAPI_VER_CURRENT){ @@ -196,7 +201,7 @@ int drhook_papi_init(int rank){ PAPI_VERSION_REVISION( lib_version ), nthreads ); - if (drhook_papi_rank==0) printf("%s\n",pmsg); + if (drhook_papi_rank==0 && !silent) printf("%s\n",pmsg); drhook_papi_event_set=malloc(nthreads*sizeof(int)); @@ -207,7 +212,7 @@ int drhook_papi_init(int rank){ /* if (failed){ drhook_papi_state=drhook_papi_failed ; return 0;} */ drhook_papi_state=drhook_papi_running; - if (drhook_papi_rank==0) printf("DRHOOK:PAPI: Initialisation sucess\n"); + if (drhook_papi_rank==0 && !silent) printf("DRHOOK:PAPI: Initialisation sucess\n"); return 1; } @@ -224,16 +229,18 @@ int dr_hook_papi_start_threads(int* events){ return 0; } - printf("DRHOOK:PAPI: Event set %d created for thread %d\n",events[thread],thread); + if (!silent) printf("DRHOOK:PAPI: Event set %d created for thread %d\n",events[thread],thread); int prof_papi_numcntrs=NPAPICNTRS; for (int counter=0; counter < prof_papi_numcntrs; counter ++){ int eventCode; - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: %s (%s)",hookCounters[counter][0],hookCounters[counter][1]); - if (drhook_papi_rank==0) { - if (thread==0) { - printf("%s\n",pmsg); + if (!silent) { + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: %s (%s)",hookCounters[counter][0],hookCounters[counter][1]); + if (drhook_papi_rank==0) { + if (thread==0) { + printf("%s\n",pmsg); + } } } From 61d3927587da1249f4b0615a8415e9747dd4ec59 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 19 Aug 2024 15:07:32 +0100 Subject: [PATCH 05/32] Change error messages to be more explicit Fix typo --- src/fiat/drhook/drhook_papi.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index ed044b10..c9da02e9 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -105,11 +105,11 @@ long_long drhook_papi_read(int counterId){ int drhook_papi_readAll(long_long * counterArray){ if (drhook_papi_state!=drhook_papi_running){ - printf("Fault: papi not running\n"); + printf("DRHOOK:PAPI: Error reading counters, papi is not running\n"); exit (1); } if (!drhook_papi_event_set){ - printf("Fault: Eventset was null\n"); + printf("DRHOOK:PAPI: Error reading counters, eventset\n"); exit (1); } int err=PAPI_read(drhook_papi_event_set[safe_thread_num()],counterArray); @@ -139,7 +139,7 @@ int drhook_papi_init(int rank){ drhook_papi_rank=rank; if (oml_in_parallel()){ - printf("DRHOOK:PAPI: Tried to initialise from a parallel region :-(\n"); + printf("DRHOOK:PAPI: Error, tried to initialise from a parallel region :-(\n"); return 0; } @@ -152,32 +152,32 @@ int drhook_papi_init(int rank){ paperr,PAPI_VER_CURRENT); printf("%s\n",pmsg); if (paperr > 0) { - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: library version mismatch between compilation and run!\n"); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, library version mismatch between compilation and run!\n"); printf("%s\n",pmsg); return 0; } if (paperr == PAPI_EINVAL){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_EINVAL\n"); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, PAPI_EINVAL\n"); printf("%s\n",pmsg); return 0; } if (paperr == PAPI_ENOMEM){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_ENOMEM\n"); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, PAPI_ENOMEM\n"); printf("%s\n",pmsg); return 0; } if (paperr == PAPI_ESBSTR){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_ESBSTR\n"); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, PAPI_ESBSTR\n"); printf("%s\n",pmsg); return 0; } if (paperr == PAPI_ESYS){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: PAPI_ESYS\n"); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, PAPI_ESYS\n"); printf("%s\n",pmsg); return 0; } else { - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Unknown error code\n"); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, unknown error code: %d\n", paperr); printf("%s\n",pmsg); return 0; } @@ -190,7 +190,7 @@ int drhook_papi_init(int rank){ paperr=PAPI_thread_init(safe_thread_num); if( paperr != PAPI_OK ){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: thread init failed (%s)",PAPI_strerror(paperr)); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, thread init failed (%s)",PAPI_strerror(paperr)); printf("%s\n",pmsg); return 0; } @@ -224,7 +224,7 @@ int dr_hook_papi_start_threads(int* events){ events[thread]=PAPI_NULL; papiErr=PAPI_create_eventset(&events[thread]); if (papiErr != PAPI_OK){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: create event set failed (%s) \n",PAPI_strerror(papiErr)); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, create event set failed (%s) \n",PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; } @@ -246,7 +246,7 @@ int dr_hook_papi_start_threads(int* events){ papiErr=PAPI_event_name_to_code(hookCounters[counter][0],&eventCode); if (papiErr != PAPI_OK){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: event name to code failed (%s)",PAPI_strerror(papiErr)); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, event name to code failed (%s)",PAPI_strerror(papiErr)); printf("%s\n",pmsg); PAPI_perror("initPapi"); return 0; @@ -254,16 +254,16 @@ int dr_hook_papi_start_threads(int* events){ papiErr=PAPI_add_event(events[thread],eventCode); if (papiErr!=PAPI_OK){ - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: add_event failed: %d (%s)",papiErr,PAPI_strerror(papiErr)); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, add_event failed: %d (%s)",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); if (papiErr == PAPI_EINVAL) - printf("Invalid argumet"); + printf("Invalid argument"); else if (papiErr == PAPI_ENOMEM) - printf("Out of Mmemory"); + printf("Out of memory"); else if (papiErr == PAPI_ENOEVST) printf("EventSet does not exist"); else if (papiErr == PAPI_EISRUN) - printf("EventSet is running"); + printf("EventSet is running"); else if (papiErr == PAPI_ECNFLCT) printf("Conflict"); else if (papiErr == PAPI_ENOEVNT) @@ -303,7 +303,7 @@ int dr_hook_papi_start_threads(int* events){ papiErr=PAPI_start(events[thread]); if (papiErr != PAPI_OK) { - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: starting counters failed (%d=%s)",papiErr,PAPI_strerror(papiErr)); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, starting counters failed (%d=%s)",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; } From d652f0ddf85a55738e07b8544c6d4b9f1f8380a8 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 19 Aug 2024 15:07:54 +0100 Subject: [PATCH 06/32] Fix missing intent --- src/fiat/drhook/internal/drhook_run_omp_parallel.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index 6c3c0a02..958041d2 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -82,7 +82,7 @@ subroutine drhook_run_omp_parallel_papi_startup(events,n) bind(c) use OML_MOD implicit none INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(n) - INTEGER(KIND=C_INT), VALUE :: n + INTEGER(KIND=C_INT), VALUE, INTENT(IN) :: n INTEGER(KIND=C_INT) :: thread INTEGER(KIND=C_INT) :: rc,rcOut INTEGER :: myThread From 8e0a9e2e7d627b71ce6dfd2df57c3ce97c255ea9 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 19 Aug 2024 15:12:49 +0100 Subject: [PATCH 07/32] Add new debug print This new print clarifies what the next set of prints are referring to. It also slightly optimises the subsequent control flow. --- src/fiat/drhook/drhook_papi.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index c9da02e9..97a4e549 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -232,16 +232,15 @@ int dr_hook_papi_start_threads(int* events){ if (!silent) printf("DRHOOK:PAPI: Event set %d created for thread %d\n",events[thread],thread); int prof_papi_numcntrs=NPAPICNTRS; + if (!silent && drhook_papi_rank==0 && thread==0) + printf("DRHOOK:PAPI: Attempting to add events to event set:\n"); + for (int counter=0; counter < prof_papi_numcntrs; counter ++){ int eventCode; - - if (!silent) { + + if (!silent && drhook_papi_rank==0 && thread==0) { snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: %s (%s)",hookCounters[counter][0],hookCounters[counter][1]); - if (drhook_papi_rank==0) { - if (thread==0) { - printf("%s\n",pmsg); - } - } + printf("%s\n",pmsg); } papiErr=PAPI_event_name_to_code(hookCounters[counter][0],&eventCode); From 8553e73490ed51d1db63b190eb4977a1f1e1b209 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 21 Aug 2024 15:24:12 +0100 Subject: [PATCH 08/32] Add functionality for user specified PAPI events This allows user to specify their own PAPI events (up to MAXNPAPICNTRS, defaulting to 4). This is done with the DR_HOOK_PAPI_COUNTERS flag. If the flag is not specified, and PAPI is enabled, then the following defaults will be chosen: PAPI_TOT_CYC PAPI_FP_OPS PAPI_L1_DCA PAPI_L2_DCM If an invalid event is chosen, then DrHook will simply crash will an appropriate error message. --- src/fiat/drhook/drhook.c | 59 ++++++++++++-- src/fiat/drhook/drhook_papi.c | 81 +++++++++++-------- src/fiat/drhook/drhook_papi.h | 8 +- .../internal/drhook_run_omp_parallel.F90 | 5 +- 4 files changed, 108 insertions(+), 45 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 9836cc56..de98a627 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -2609,7 +2609,7 @@ process_options() opt_cpuprof = 0; /* Note: Switches cpuprof OFF */ opt_calls = 1; opt_cycles = 1; - opt_papi = 1; + opt_papi = 1; OPTPRINT(fp,"%s%s",comma,"COUNTERS"); comma = ","; } else if (strequ(p,"CPUPROF")) { @@ -2687,6 +2687,45 @@ process_options() OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLTRACE=%d\n",pfx,TIMESTR(tid),FFL,opt_calltrace); } +#if defined(DR_HOOK_HAVE_PAPI) + if (opt_papi) { + newline = 0; + env = getenv("DR_HOOK_PAPI_COUNTERS"); + if (env) { + const char delim[] = ", \t/"; + char *comma = " DR_HOOK_PAPI_COUNTERS=\""; + char *s = strdup_drhook(env); + char *p = s; + while (*p) { + if (islower(*p)) *p = toupper(*p); + p++; + } + p = strtok(s,delim); + for (int i = 0; p && i < drhook_papi_max_num_counters(); p = strtok(NULL,delim), i++) { + drhook_papi_add_counter_name(strdup_drhook(p)); + OPTPRINT(fp,"%s%s",comma,p); comma = ","; + } + + free_drhook(s); + if (*comma == ',') { + OPTPRINT(fp,"\"\n"); + newline = 0; + } + if (newline) OPTPRINT(fp,"\n"); + } else { + const char* default_events[4] = { + "PAPI_TOT_CYC", + "PAPI_FP_OPS", + "PAPI_L1_DCA", + "PAPI_L2_DCM" + }; + for (int i = 0; i < 4; i++) { + drhook_papi_add_counter_name(strdup_drhook(default_events[i])); + } + } + } +#endif + if (opt_wallprof || opt_cpuprof || opt_memprof || opt_timeline) { atexit(do_prof); } @@ -4585,15 +4624,21 @@ c_drhook_print_(const int *ftnunitno, if (opt_papi){ p=prof; int first_counter_is_cyc=0; - if (strcmp(drhook_papi_counter_name(0,0),"PAPI_TOT_CYC")==0) + char event_name[drhook_papi_max_name_len()]; + drhook_papi_counter_name(0,event_name); + if (strcmp(event_name,"PAPI_TOT_CYC")==0) first_counter_is_cyc=1; { len = fprintf(fpcsv,"Routine,MPI Rank,ThreadId,SelfRank,%% Self Time,Cumul,Excl Time,Incl. Time,#Calls"); - for (int c=0;ccluster]; if (opt_cputime) - cumul += p->self; + cumul += p->self; else if (p->is_max || cluster_size == 1) cumul += p->self; diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index 97a4e549..d6a2ef5e 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -19,13 +19,9 @@ enum { int drhook_papi_state=drhook_papi_notstarted; int drhook_papi_rank=0; /* C style! */ -/* hardwired for now */ -const char* hookCounters[NPAPICNTRS][2]= { - {"PAPI_TOT_CYC","Cycles"}, - {"PAPI_FP_OPS","FP Operations"}, - {"PAPI_L1_DCA","L1 Access"}, - {"PAPI_L2_DCM","L2 Miss"} -}; +static int papi_counter_event_codes[MAXNPAPICNTRS]; +static char* papi_counter_names[MAXNPAPICNTRS]; +static int papi_counters_count; /* function to use for thread id - it should be better than omp_get_thread_num! @@ -34,8 +30,16 @@ unsigned long safe_thread_num(){ return oml_my_thread()-1; } -const char* drhook_papi_counter_name(int c,int t){ - return hookCounters[c][t]; +int drhook_papi_max_num_counters() { + return MAXNPAPICNTRS; +} + +int drhook_papi_max_name_len() { + return PAPI_MAX_STR_LEN; +} + +void drhook_papi_counter_name(int c, char* event_name){ + PAPI_event_code_to_name(papi_counter_event_codes[c], event_name); } void drhook_papi_cpy(long_long* a,long_long* b){ @@ -56,8 +60,12 @@ void drhook_papi_print(char* s, long_long* a, int header){ char fmt[STD_MSG_LEN]; sprintf(fmt,"%%%lds",strlen(s)); sprintf(msg,fmt," "); - for (int i=0;i -#define NPAPICNTRS 4 +#define MAXNPAPICNTRS 4 int drhook_papi_init(int rank); int drhook_papi_num_counters(); -const char* drhook_papi_counter_name(int c,int t); +int drhook_papi_max_num_counters(); +void drhook_papi_counter_name(int c, char* event_name); +void drhook_papi_add_counter_name(const char* counter_name); long_long drhook_papi_read(int counterId); int drhook_papi_readAll(long_long * counterArray); /* implemented in forrtran */ -int drhook_run_omp_parallel_papi_startup(int * drhook_papi_event_set,int nthreads); +int drhook_run_omp_parallel_papi_startup(int * drhook_papi_event_set,int nthreads, int* rcout); /* a = b - c if b or c == NULL means use current readings diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index 958041d2..ce9780c3 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -76,7 +76,7 @@ end subroutine drhook_run_omp_parallel_get_cycles #if defined(DR_HOOK_HAVE_PAPI) -subroutine drhook_run_omp_parallel_papi_startup(events,n) bind(c) +subroutine drhook_run_omp_parallel_papi_startup(events,n, rcOut) bind(c) use, intrinsic :: iso_c_binding, only : c_char, c_int, c_double use drhook_papi_interface use OML_MOD @@ -84,7 +84,8 @@ subroutine drhook_run_omp_parallel_papi_startup(events,n) bind(c) INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(n) INTEGER(KIND=C_INT), VALUE, INTENT(IN) :: n INTEGER(KIND=C_INT) :: thread - INTEGER(KIND=C_INT) :: rc,rcOut + INTEGER(KIND=C_INT) :: rc + INTEGER(KIND=C_INT), INTENT(OUT) :: rcOut INTEGER :: myThread INTEGER :: nThreads From 23810e1e96c98a9b8ada0b971704747e24bd8673 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 28 Aug 2024 14:22:42 +0100 Subject: [PATCH 09/32] Fix whitespacing This commit removes all tabs, in favour of two spaces, in files touched by #26 and #27. --- src/fiat/drhook/drhook.c | 250 +++++++++--------- src/fiat/drhook/drhook_papi.c | 27 +- .../internal/drhook_run_omp_parallel.F90 | 2 +- src/fiat/system/internal/opfla_perfmon.c | 10 +- 4 files changed, 148 insertions(+), 141 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index de98a627..8d5cb45a 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -1051,7 +1051,8 @@ insert_calltree(int tid, drhook_key_t *keyptr) static void remove_calltree(int tid, drhook_key_t *keyptr, const double *delta_wall, const double *delta_cpu, - const long long int *delta_cycles,long_long * delta_counters) + const long long int *delta_cycles,long_long * delta_counters + ) { if (tid >= 1 && tid <= numthreads) { drhook_calltree_t *treeptr = thiscall[tid-1]; @@ -1061,10 +1062,10 @@ remove_calltree(int tid, drhook_key_t *keyptr, drhook_key_t *parent_keyptr = treeptr->prev->keyptr; if (parent_keyptr) { /* extra security */ #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_add(NULL, - parent_keyptr->delta_counters_child, - delta_counters - ); + drhook_papi_add(NULL, + parent_keyptr->delta_counters_child, + delta_counters + ); #endif if (opt_walltime) { parent_keyptr->delta_wall_child += (*delta_wall); @@ -1626,7 +1627,7 @@ signal_drhook(int sig SIG_EXTRA_ARGS) ------------------------------------------------------------*/ /* if (sig != SIGTERM) signal(SIGTERM, SIG_DFL); */ /* Let the default SIGTERM to occur */ - + // max_threads = drhook_oml_get_max_threads(); if (nsigs == 1) { /*---- First call to signal handler: call alarm(drhook_harakiri_timeout), tracebacks, exit ------*/ @@ -2648,7 +2649,7 @@ process_options() opt_callpath = 1; OPTPRINT(fp,"%s%s",comma,"CALLPATH"); comma = ","; } else { - printf("DrHook: Note - no match for HOOK_OPT : %s\n",p); + printf("DrHook: Note - no match for HOOK_OPT : %s\n",p); } p = strtok(NULL,delim); } @@ -2865,9 +2866,9 @@ getkey(int tid, const char *name, int name_len, (opt_trim && strncasecmp(keyptr->name, name, name_len) == 0)))) { if (opt_walltime) keyptr->wall_in = walltime ? *walltime : WALLTIME(); if (opt_cputime) keyptr->cpu_in = cputime ? *cputime : CPUTIME(); - if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); + if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_readAll(keyptr->counters_in); + drhook_papi_readAll(keyptr->counters_in); #endif if (any_memstat) memstat(keyptr,&tid,1); if (opt_calls) { @@ -3465,7 +3466,7 @@ c_drhook_check_watch_(const char *where, double cputime = opt_cputime ? CPUTIME() : 0; \ long long int cycles = opt_cycles ? ec_get_cycles() : 0; \ long long int hwm = opt_gethwm ? gethwm_() : 0; \ - long long int stk = opt_getstk ? getstk_() : 0; \ + long long int stk = opt_getstk ? getstk_() : 0; \ PAPIREAD /*=== c_drhook_set_lhook_ ===*/ @@ -4349,11 +4350,11 @@ c_drhook_print_(const int *ftnunitno, while (keyptr) { if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) { #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_subtract(p->counter_self, - keyptr->delta_counters_all, - keyptr->delta_counters_child); - drhook_papi_cpy(p->counter_tot, - keyptr->delta_counters_all); + drhook_papi_subtract(p->counter_self, + keyptr->delta_counters_all, + keyptr->delta_counters_child + ); + drhook_papi_cpy(p->counter_tot, keyptr->delta_counters_all); #endif p->self = opt_wallprof ? keyptr->delta_wall_all - keyptr->delta_wall_child : @@ -4406,19 +4407,20 @@ c_drhook_print_(const int *ftnunitno, pfx,TIMESTR(tid),FFL, myproc,filename); } - fp = fopen(filename,"w"); + fp = fopen(filename,"w"); if (!fp) goto finish_3; - if (opt_papi==1){ - if ((myproc == 1 && mon_out_procs == -1) || mon_out_procs == myproc) { - fprintf(stderr, - "%s %s [%s@%s:%d] Writing counter information of proc#%d into file '%s'\n", - pfx,TIMESTR(tid),FFL, - myproc,csvfilename); - } - fpcsv = fopen(csvfilename,"w"); - if (!fpcsv) goto finish_3; - } + if (opt_papi==1){ + if ((myproc == 1 && mon_out_procs == -1) || mon_out_procs == myproc) { + fprintf(stderr, + "%s %s [%s@%s:%d] Writing counter information of proc#%d into file '%s'\n", + pfx,TIMESTR(tid),FFL, + myproc,csvfilename + ); + } + fpcsv = fopen(csvfilename,"w"); + if (!fpcsv) goto finish_3; + } /* alphanumerical sorting to find out clusters of the same routine but on different threads */ /* also find out total wall clock time */ @@ -4574,105 +4576,109 @@ c_drhook_print_(const int *ftnunitno, } } - fprintf(fp,"\n"); - { - len = - fprintf(fp," # %% Time Cumul Self Total # of calls Self Total "); - } - fprintf(fp,"Routine@"); - if (opt_clusterinfo) fprintf(fp," [Cluster:(id,size)]"); - fprintf(fp,"\n"); - if (opt_sizeinfo) fprintf(fp,"%*s %s\n",len-20," ","(Size; Size/sec; Size/call; MinSize; MaxSize)"); - fprintf(fp, " (self) (sec) (sec) (sec) ms/call ms/call\n"); - fprintf(fp,"\n"); - - cumul = 0; - for (j=0; jcluster]; - if (p->pc < percent_limit) break; - if (opt_cputime) { - cumul += p->self; - } - else { - if (p->is_max || cluster_size == 1) cumul += p->self; - } - - { - fprintf(fp, fmt, - ++j, p->pc, cumul, p->self, p->total, p->calls, - p->percall_ms_self, p->percall_ms_total, - p->is_max ? "*" : " "); - } - print_routine_name(fp, p, len, cluster_size); - - if (opt_sizeinfo && p->sizeinfo > 0) { - char s1[DRHOOK_STRBUF], s2[DRHOOK_STRBUF], s3[DRHOOK_STRBUF]; - char s4[DRHOOK_STRBUF], s5[DRHOOK_STRBUF]; - lld_commie(p->sizeinfo,s1); - dbl_commie(p->sizespeed,s2); - dbl_commie(p->sizeavg,s3); - lld_commie(p->min_sizeinfo,s4); - lld_commie(p->max_sizeinfo,s5); - fprintf(fp,"\n%*s (%s; %s; %s; %s; %s)",len-20," ",s1,s2,s3,s4,s5); - } - fprintf(fp,"\n"); - p++; - } /* for (j=0; j"); + if (opt_clusterinfo) + fprintf(fp, " [Cluster:(id,size)]"); + fprintf(fp, "\n"); + if (opt_sizeinfo) + fprintf(fp, "%*s %s\n",len-20," ","(Size; Size/sec; Size/call; MinSize; MaxSize)"); + fprintf(fp, " (self) (sec) (sec) (sec) ms/call ms/call\n"); + fprintf(fp, "\n"); + + cumul = 0; + for (j = 0; j < nprof; ) { + int cluster_size = clusize[p->cluster]; + if (p->pc < percent_limit) + break; + if (opt_cputime) { + cumul += p->self; + } else { + if (p->is_max || cluster_size == 1) + cumul += p->self; + } + + { + fprintf(fp, fmt, + ++j, p->pc, cumul, p->self, p->total, p->calls, + p->percall_ms_self, p->percall_ms_total, + p->is_max ? "*" : " "); + } + print_routine_name(fp, p, len, cluster_size); + + if (opt_sizeinfo && p->sizeinfo > 0) { + char s1[DRHOOK_STRBUF], s2[DRHOOK_STRBUF], s3[DRHOOK_STRBUF]; + char s4[DRHOOK_STRBUF], s5[DRHOOK_STRBUF]; + lld_commie(p->sizeinfo,s1); + dbl_commie(p->sizespeed,s2); + dbl_commie(p->sizeavg,s3); + lld_commie(p->min_sizeinfo,s4); + lld_commie(p->max_sizeinfo,s5); + fprintf(fp,"\n%*s (%s; %s; %s; %s; %s)",len-20," ",s1,s2,s3,s4,s5); + } + fprintf(fp,"\n"); + p++; + } /* for (j=0; jcluster]; - if (opt_cputime) - cumul += p->self; - else - if (p->is_max || cluster_size == 1) cumul += p->self; - - { - fprintf(fpcsv, csvfmt, - p->name, - myproc-1, - p->tid-1 - ,++j, p->pc, cumul, p->self, p->total, p->calls, - p->is_max ? "*" : " "); - for (int c=0;ccounter_self[c]); - for (int c=0;ccounter_tot[c]); - if (first_counter_is_cyc==1) - fprintf(fpcsv,",%.3f,%.3f", - p->counter_self[0]/p->self/1000000.0, - p->counter_tot[0]/p->total/1000000.0 - ); - } - fprintf(fpcsv,"\n"); - p++; - } /* for (j=0; jcluster]; + if (opt_cputime) + cumul += p->self; + else + if (p->is_max || cluster_size == 1) cumul += p->self; + + { + fprintf(fpcsv, csvfmt, + p->name, + myproc-1, + p->tid-1, + ++j, p->pc, cumul, p->self, p->total, p->calls, + p->is_max ? "*" : " " + ); + for (int c=0;ccounter_self[c]); + for (int c=0;ccounter_tot[c]); + if (first_counter_is_cyc==1) + fprintf(fpcsv,",%.3f,%.3f", + p->counter_self[0]/p->self/1000000.0, + p->counter_tot[0]/p->total/1000000.0 + ); + } + fprintf(fpcsv, "\n"); + p++; + } /* for (j=0; j 0) { snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, library version mismatch between compilation and run!\n"); @@ -209,10 +208,10 @@ int drhook_papi_init(int rank){ } snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Version %d.%d.%d initialised with %d threads", - PAPI_VERSION_MAJOR( lib_version ), - PAPI_VERSION_MINOR( lib_version ), - PAPI_VERSION_REVISION( lib_version ), - nthreads ); + PAPI_VERSION_MAJOR( lib_version ), + PAPI_VERSION_MINOR( lib_version ), + PAPI_VERSION_REVISION( lib_version ), + nthreads); if (drhook_papi_rank==0 && !silent) printf("%s\n",pmsg); @@ -271,17 +270,17 @@ int dr_hook_papi_start_threads(int* events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, add_event failed: %d (%s)",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); if (papiErr == PAPI_EINVAL) - printf("Invalid argument\n"); + printf("Invalid argument\n"); else if (papiErr == PAPI_ENOMEM) - printf("Out of memory\n"); + printf("Out of memory\n"); else if (papiErr == PAPI_ENOEVST) - printf("EventSet does not exist\n"); + printf("EventSet does not exist\n"); else if (papiErr == PAPI_EISRUN) - printf("EventSet is running\n"); + printf("EventSet is running\n"); else if (papiErr == PAPI_ECNFLCT) - printf("Conflict\n"); + printf("Conflict\n"); else if (papiErr == PAPI_ENOEVNT) - printf("Preset not available\n"); + printf("Preset not available\n"); return 0; } else { @@ -304,8 +303,8 @@ int dr_hook_papi_start_threads(int* events){ } #if defined(DEBUG) for (int counter=0;counter Date: Tue, 22 Oct 2024 16:23:16 +0100 Subject: [PATCH 10/32] Fix malloc -> malloc_drhook --- src/fiat/drhook/drhook_papi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index ae3e30d2..a5a60cb3 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -139,6 +139,8 @@ void drhook_papi_readall_(long_long * counterArray){ drhook_papi_readAll(counterArray); } +extern static void *malloc_drhook(size_t size); + /* return 1 if papi can be used after the call */ int drhook_papi_init(int rank){ int lib_version; @@ -215,7 +217,7 @@ int drhook_papi_init(int rank){ if (drhook_papi_rank==0 && !silent) printf("%s\n",pmsg); - drhook_papi_event_set=malloc(nthreads*sizeof(int)); + drhook_papi_event_set=malloc_drhook(nthreads*sizeof(int)); int rcout; drhook_run_omp_parallel_papi_startup(drhook_papi_event_set,nthreads, &rcout); From b19fdd2c477670ee945a3a4cd7271ee7253ae960 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Tue, 22 Oct 2024 16:23:45 +0100 Subject: [PATCH 11/32] Fix safe_thread_num -> papi_safe_thread_num --- src/fiat/drhook/drhook_papi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/drhook_papi.c index a5a60cb3..27da6d00 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/drhook_papi.c @@ -26,7 +26,7 @@ static int papi_counters_count; /* function to use for thread id - it should be better than omp_get_thread_num! */ -unsigned long safe_thread_num(){ +unsigned long papi_safe_thread_num(){ return oml_my_thread()-1; } @@ -125,9 +125,9 @@ int drhook_papi_readAll(long_long * counterArray){ printf("DRHOOK:PAPI: Error reading counters, eventset\n"); exit (1); } - int err=PAPI_read(drhook_papi_event_set[safe_thread_num()],counterArray); + int err=PAPI_read(drhook_papi_event_set[papi_safe_thread_num()],counterArray); if (err!=PAPI_OK){ - printf("DRHOOK:PAPI:PAPI_read: Error reading counters, thread=%ld es=%d %s\n",safe_thread_num(),drhook_papi_event_set[safe_thread_num()],PAPI_strerror(err)); + printf("DRHOOK:PAPI:PAPI_read: Error reading counters, thread=%ld es=%d %s\n",papi_safe_thread_num(),drhook_papi_event_set[papi_safe_thread_num()],PAPI_strerror(err)); } #if defined(DEBUG) drhook_papi_print("readAll:",counterArray,0); @@ -201,7 +201,7 @@ int drhook_papi_init(int rank){ int nthreads=oml_get_max_threads(); - paperr=PAPI_thread_init(safe_thread_num); + paperr=PAPI_thread_init(papi_safe_thread_num); if( paperr != PAPI_OK ){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error, thread init failed (%s)",PAPI_strerror(paperr)); @@ -233,7 +233,7 @@ int drhook_papi_init(int rank){ } int dr_hook_papi_start_threads(int* events){ - int thread=safe_thread_num(); + int thread=papi_safe_thread_num(); int papiErr; char pmsg[STD_MSG_LEN]; From 8049e0499406d713260e78d37487896a5818cc77 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 23 Oct 2024 15:23:44 +0100 Subject: [PATCH 12/32] Add if (opt_papi) guards While PAPI code was behind #ifdefs, opt_papi was only used for toggling the printing of counters. PAPI would still be running as part of DrHook. To be inline with other options, PAPI is now behind both #ifdefs statically and if (opt_papi) dynamically. --- src/fiat/drhook/drhook.c | 52 +++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 66afc71c..94a778f2 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -1075,10 +1075,11 @@ remove_calltree(int tid, drhook_key_t *keyptr, drhook_key_t *parent_keyptr = treeptr->prev->keyptr; if (parent_keyptr) { /* extra security */ #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_add(NULL, - parent_keyptr->delta_counters_child, - delta_counters - ); + if (opt_papi) + drhook_papi_add(NULL, + parent_keyptr->delta_counters_child, + delta_counters + ); #endif if (opt_walltime) { parent_keyptr->delta_wall_child += (*delta_wall); @@ -2925,7 +2926,7 @@ getkey(int tid, const char *name, int name_len, if (opt_cputime) keyptr->cpu_in = cputime ? *cputime : CPUTIME(); if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_readAll(keyptr->counters_in); + if (opt_papi) drhook_papi_readAll(keyptr->counters_in); #endif if (any_memstat) memstat(keyptr,&tid,1); if (opt_calls) { @@ -3045,11 +3046,6 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, else if (tid >= 1 && tid <= numthreads) { double delta_wall = 0; double delta_cpu = 0; - long_long * delta_counters = NULL; -#if defined(DR_HOOK_HAVE_PAPI) - delta_counters=alloca(drhook_papi_num_counters() * sizeof(long_long) ); - drhook_papi_bzero(delta_counters); -#endif long long int delta_cycles = 0; if (any_memstat) memstat(keyptr,&tid,0); if (opt_calls) keyptr->status--; @@ -3090,9 +3086,14 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, } } #endif + long_long * delta_counters = NULL; #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_subtract(delta_counters, NULL , keyptr->counters_in); - drhook_papi_add(NULL, keyptr->delta_counters_all, delta_counters); + if (opt_papi) { + delta_counters = alloca(drhook_papi_num_counters() * sizeof(long_long)); + drhook_papi_bzero(delta_counters); + drhook_papi_subtract(delta_counters, NULL, keyptr->counters_in); + drhook_papi_add(NULL, keyptr->delta_counters_all, delta_counters); + } #endif remove_calltree(tid, keyptr, &delta_wall, &delta_cpu, &delta_cycles, delta_counters); } @@ -3221,7 +3222,8 @@ itself(drhook_key_t *keyptr_self, if (opt_wallprof) keyptr->wall_in = walltime ? *walltime : WALLTIME(); else keyptr->cpu_in = cputime ? *cputime : CPUTIME(); #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_readAll(keyptr->counters_in); + if (opt_papi) + drhook_papi_readAll(keyptr->counters_in); #endif keyptr->calls++; } @@ -3238,14 +3240,15 @@ itself(drhook_key_t *keyptr_self, if (delta_time) *delta_time = delta; #if defined(DR_HOOK_HAVE_PAPI) + if (opt_papi) { + long_long cntrs_delta[NPAPICNTRS]; - long_long cntrs_delta[NPAPICNTRS]; + /* cntrs_delta = current - counters_in */ + drhook_papi_subtract(cntrs_delta, NULL, keyptr->counters_in); - /* cntrs_delta = current - counters_in */ - drhook_papi_subtract(cntrs_delta, NULL, keyptr->counters_in); - - /* keyptr->delta_counters_all += cntrs_delta */ - drhook_papi_add(NULL, keyptr->delta_counters_all,cntrs_delta); + /* keyptr->delta_counters_all += cntrs_delta */ + drhook_papi_add(NULL, keyptr->delta_counters_all,cntrs_delta); + } #endif } @@ -3540,8 +3543,10 @@ c_drhook_check_watch_(const char *where, /*** PUBLIC ***/ #if defined(DR_HOOK_HAVE_PAPI) #define PAPIREAD \ - long_long cntrs[NPAPICNTRS]; \ - drhook_papi_readAll(cntrs) + if (opt_papi) { \ + long_long cntrs[NPAPICNTRS]; \ + drhook_papi_readAll(cntrs) \ + } #else #define PAPIREAD /*NOOP*/ #endif @@ -3636,7 +3641,7 @@ c_drhook_init_(const char *progname, drhook_delete_lockfile(); } #if defined(DR_HOOK_HAVE_PAPI) - drhook_papi_init(myproc -1); + if (opt_papi) drhook_papi_init(myproc -1); #endif } @@ -4435,6 +4440,9 @@ c_drhook_print_(const int *ftnunitno, while (keyptr) { if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) { #if defined(DR_HOOK_HAVE_PAPI) + /* No point slowing down this code with an if (opt_papi) + * as it can be called by signal_drhook(). This would just be + * processing zeros anyway as we only use calloc() for keys */ drhook_papi_subtract(p->counter_self, keyptr->delta_counters_all, keyptr->delta_counters_child From 6bc73797467b6ef6abf63b55badcc6ef7a65ea07 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 23 Oct 2024 16:22:49 +0100 Subject: [PATCH 13/32] Refactor PAPI extension --- src/fiat/CMakeLists.txt | 19 +++++++++++++------ .../{ => extensions/papi}/drhook_papi.c | 3 --- .../{ => extensions/papi}/drhook_papi.h | 2 -- 3 files changed, 13 insertions(+), 11 deletions(-) rename src/fiat/drhook/{ => extensions/papi}/drhook_papi.c (99%) rename src/fiat/drhook/{ => extensions/papi}/drhook_papi.h (96%) diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index 28674ba4..08dcc332 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -83,6 +83,19 @@ if (HAVE_DR_HOOK_NVTX) endif() endif() + +if (HAVE_DR_HOOK_PAPI) + # Files from within DrHook + ecbuild_list_add_pattern( LIST fiat_papi_src GLOB *.c SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/drhook/extensions/papi) + target_sources(fiat PRIVATE ${fiat_papi_src}) + target_include_directories(fiat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/drhook/extensions/papi) + + # Files defined externally + target_link_libraries ( fiat PRIVATE ${PAPI_LIBRARIES} ) + target_include_directories ( fiat PRIVATE ${PAPI_INCLUDE_DIRS} ) + target_compile_definitions ( fiat PRIVATE DR_HOOK_HAVE_PAPI=1 ) +endif() + if( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) # Following should not be necessary; # Probably a bug in the M1 prerelease of gfortran 10.2.0.4 @@ -106,12 +119,6 @@ if( HAVE_OMP ) target_link_libraries( fiat PRIVATE OpenMP::OpenMP_Fortran ) endif() -if ( HAVE_DR_HOOK_PAPI ) - target_link_libraries ( fiat PRIVATE ${PAPI_LIBRARIES} ) - target_include_directories ( fiat PRIVATE ${PAPI_INCLUDE_DIRS} ) - target_compile_definitions ( fiat PRIVATE DR_HOOK_HAVE_PAPI=1 ) -endif() - fiat_target_ignore_missing_symbols( TARGET fiat SYMBOLS _MallocExtension_GetNumericProperty _MPI_Get_version diff --git a/src/fiat/drhook/drhook_papi.c b/src/fiat/drhook/extensions/papi/drhook_papi.c similarity index 99% rename from src/fiat/drhook/drhook_papi.c rename to src/fiat/drhook/extensions/papi/drhook_papi.c index 27da6d00..8c4cded3 100644 --- a/src/fiat/drhook/drhook_papi.c +++ b/src/fiat/drhook/extensions/papi/drhook_papi.c @@ -1,4 +1,3 @@ -#if defined(DR_HOOK_HAVE_PAPI) #include "drhook_papi.h" #include #include @@ -329,5 +328,3 @@ int dr_hook_papi_start_threads(int* events){ return 1; } - -#endif diff --git a/src/fiat/drhook/drhook_papi.h b/src/fiat/drhook/extensions/papi/drhook_papi.h similarity index 96% rename from src/fiat/drhook/drhook_papi.h rename to src/fiat/drhook/extensions/papi/drhook_papi.h index fba74b59..95624421 100644 --- a/src/fiat/drhook/drhook_papi.h +++ b/src/fiat/drhook/extensions/papi/drhook_papi.h @@ -1,6 +1,5 @@ #ifndef DRHOOK_PAPI #define DRHOOK_PAPI -#if defined(DR_HOOK_HAVE_PAPI) #include @@ -37,4 +36,3 @@ void drhook_papi_print(char * s,long_long* a,int header); #else #define long_long long long #endif -#endif From 0e5fffeb567512e4180e3b29cc328cc4cc7f3219 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Thu, 24 Oct 2024 12:54:19 +0100 Subject: [PATCH 14/32] Fix NPAPICNTRS -> MAXNPAPICNTRS --- CMakeLists.txt | 1 - src/fiat/drhook/drhook.c | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f7473f35..81fd3ea2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,6 @@ ecbuild_add_option( FEATURE WARNINGS DEFAULT ON DESCRIPTION "Add warnings to compiler" ) - ecbuild_add_option( FEATURE DR_HOOK_NVTX DEFAULT ${DEFAULT_DR_HOOK_NVTX} DESCRIPTION "Support for NVTX in DR_HOOK" diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 94a778f2..ff1c150e 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -484,9 +484,9 @@ typedef struct drhook_key_t { long long int paging_in; #if defined(DR_HOOK_HAVE_PAPI) - long_long counters_in[NPAPICNTRS]; - long_long delta_counters_all[NPAPICNTRS]; - long_long delta_counters_child[NPAPICNTRS]; + long_long counters_in[MAXNPAPICNTRS]; + long_long delta_counters_all[MAXNPAPICNTRS]; + long_long delta_counters_child[MAXNPAPICNTRS]; #endif unsigned long long int alloc_count, free_count; @@ -521,8 +521,8 @@ typedef struct drhook_prof_t { double total; double self; #if defined(DR_HOOK_HAVE_PAPI) - long_long counter_tot[NPAPICNTRS]; - long_long counter_self[NPAPICNTRS]; + long_long counter_tot[MAXNPAPICNTRS]; + long_long counter_self[MAXNPAPICNTRS]; #endif unsigned long long int calls; double percall_ms_self; @@ -3241,7 +3241,7 @@ itself(drhook_key_t *keyptr_self, #if defined(DR_HOOK_HAVE_PAPI) if (opt_papi) { - long_long cntrs_delta[NPAPICNTRS]; + long_long cntrs_delta[MAXNPAPICNTRS]; /* cntrs_delta = current - counters_in */ drhook_papi_subtract(cntrs_delta, NULL, keyptr->counters_in); @@ -3544,8 +3544,8 @@ c_drhook_check_watch_(const char *where, #if defined(DR_HOOK_HAVE_PAPI) #define PAPIREAD \ if (opt_papi) { \ - long_long cntrs[NPAPICNTRS]; \ - drhook_papi_readAll(cntrs) \ + long_long cntrs[MAXNPAPICNTRS]; \ + drhook_papi_readAll(cntrs); \ } #else #define PAPIREAD /*NOOP*/ From b10867bd3e491ead34b8dd16475d84497387d0e5 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Thu, 24 Oct 2024 13:15:46 +0100 Subject: [PATCH 15/32] Remove static from malloc_drhook() The qualifier needed to be removed so that malloc_drhook() could be used from within drhook_papi.c. To try and retain some restricted scoping (albeit no longer enforced by the compiler), malloc_drhook() hasn't been added to a header and extern is needed to use it. --- src/fiat/drhook/drhook.c | 3 +-- src/fiat/drhook/extensions/papi/drhook_papi.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index ff1c150e..1dd38755 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -810,8 +810,7 @@ static int set_default_handler(int sig, int unlimited_corefile, int verbose) /*--- malloc_drhook ---*/ -static void * -malloc_drhook(size_t size) +void *malloc_drhook(size_t size) { size_t size1 = MAX(1,size); void *p = malloc(size1); diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.c b/src/fiat/drhook/extensions/papi/drhook_papi.c index 8c4cded3..27e1c583 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.c +++ b/src/fiat/drhook/extensions/papi/drhook_papi.c @@ -138,7 +138,7 @@ void drhook_papi_readall_(long_long * counterArray){ drhook_papi_readAll(counterArray); } -extern static void *malloc_drhook(size_t size); +extern void *malloc_drhook(size_t size); /* return 1 if papi can be used after the call */ int drhook_papi_init(int rank){ From 95964423d6be367747953f864837085b2b1af231 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Thu, 24 Oct 2024 14:51:30 +0100 Subject: [PATCH 16/32] Move OML_MY_THREAD() into parallel region Also fixed some related white spacing --- src/fiat/drhook/extensions/papi/drhook_papi.c | 2 +- src/fiat/drhook/extensions/papi/drhook_papi.h | 12 ++++++------ src/fiat/drhook/internal/drhook_run_omp_parallel.F90 | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.c b/src/fiat/drhook/extensions/papi/drhook_papi.c index 27e1c583..965120e8 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.c +++ b/src/fiat/drhook/extensions/papi/drhook_papi.c @@ -219,7 +219,7 @@ int drhook_papi_init(int rank){ drhook_papi_event_set=malloc_drhook(nthreads*sizeof(int)); int rcout; - drhook_run_omp_parallel_papi_startup(drhook_papi_event_set,nthreads, &rcout); + drhook_run_omp_parallel_papi_startup(drhook_papi_event_set, nthreads, &rcout); if (rcout) return 0; diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.h b/src/fiat/drhook/extensions/papi/drhook_papi.h index 95624421..d12f7bd0 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.h +++ b/src/fiat/drhook/extensions/papi/drhook_papi.h @@ -11,10 +11,10 @@ int drhook_papi_max_num_counters(); void drhook_papi_counter_name(int c, char* event_name); void drhook_papi_add_counter_name(const char* counter_name); long_long drhook_papi_read(int counterId); -int drhook_papi_readAll(long_long * counterArray); +int drhook_papi_readAll(long_long* counterArray); -/* implemented in forrtran */ -int drhook_run_omp_parallel_papi_startup(int * drhook_papi_event_set,int nthreads, int* rcout); +/* implemented in fortran */ +int drhook_run_omp_parallel_papi_startup(int* drhook_papi_event_set, int nthreads, int* rcout); /* a = b - c if b or c == NULL means use current readings @@ -23,15 +23,15 @@ void drhook_papi_subtract(long_long* a, long_long* b, long_long* c); /* a = b + c if a==NULL, b=b+c */ -void drhook_papi_add(long_long* a,long_long* b,long_long* c); +void drhook_papi_add(long_long* a, long_long* b, long_long* c); /* a = b */ -void drhook_papi_cpy(long_long* a,long_long* b); +void drhook_papi_cpy(long_long* a, long_long* b); /* a=0 */ void drhook_papi_bzero(long_long* a); -void drhook_papi_print(char * s,long_long* a,int header); +void drhook_papi_print(char* s, long_long* a, int header); #else #define long_long long long diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index 6fd6138e..655fb27b 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -76,7 +76,7 @@ end subroutine drhook_run_omp_parallel_get_cycles #if defined(DR_HOOK_HAVE_PAPI) -subroutine drhook_run_omp_parallel_papi_startup(events,n, rcOut) bind(c) +subroutine drhook_run_omp_parallel_papi_startup(events, n, rcOut) bind(c) use, intrinsic :: iso_c_binding, only : c_char, c_int, c_double use drhook_papi_interface use OML_MOD @@ -89,14 +89,14 @@ subroutine drhook_run_omp_parallel_papi_startup(events,n, rcOut) bind(c) INTEGER :: myThread INTEGER :: nThreads - myThread=OML_MY_THREAD()-1 nThreads=OML_GET_MAX_THREADS() rcOut=0 - !$OMP PARALLEL + !$OMP PARALLEL PRIVATE(myThread,rc) SHARED(rcOut) + myThread=OML_MY_THREAD()-1 DO thread=0,nThreads-1 if (thread==myThread) then rc=dr_hook_papi_start_threads(events) - if (rc==0)rcOut=1 + if (rc==0) rcOut=1 end if !$OMP BARRIER END DO From 4cd9b527ea1add4410c60b880614e2b1235cde99 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 28 Oct 2024 12:08:30 +0000 Subject: [PATCH 17/32] Add PAPI tests --- tests/drhook/CMakeLists.txt | 7 +- tests/drhook/drhook_papi/CMakeLists.txt | 109 ++++++++++++++++++ .../drhook/drhook_papi/drhook_papi_basic.F90 | 27 +++++ tests/drhook/drhook_papi/drhook_papi_mpi.F90 | 34 ++++++ .../drhook_papi/drhook_papi_user_counters.F90 | 27 +++++ .../drhook_papi/drhook_papi_user_filename.F90 | 27 +++++ 6 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 tests/drhook/drhook_papi/CMakeLists.txt create mode 100644 tests/drhook/drhook_papi/drhook_papi_basic.F90 create mode 100644 tests/drhook/drhook_papi/drhook_papi_mpi.F90 create mode 100644 tests/drhook/drhook_papi/drhook_papi_user_counters.F90 create mode 100644 tests/drhook/drhook_papi/drhook_papi_user_filename.F90 diff --git a/tests/drhook/CMakeLists.txt b/tests/drhook/CMakeLists.txt index f41fa5b2..f8bbc5fa 100644 --- a/tests/drhook/CMakeLists.txt +++ b/tests/drhook/CMakeLists.txt @@ -84,4 +84,9 @@ ecbuild_add_test( TARGET fiat_test_drhook_ex5 # NVTX if (HAVE_DR_HOOK_NVTX) add_subdirectory(drhook_nvtx) -endif () \ No newline at end of file +endif () + + # PAPI + if (HAVE_DR_HOOK_PAPI) + add_subdirectory(drhook_papi) + endif () \ No newline at end of file diff --git a/tests/drhook/drhook_papi/CMakeLists.txt b/tests/drhook/drhook_papi/CMakeLists.txt new file mode 100644 index 00000000..cf8b7c0b --- /dev/null +++ b/tests/drhook/drhook_papi/CMakeLists.txt @@ -0,0 +1,109 @@ +# +# (C) Copyright 2024- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +# Test basic implementation + +ecbuild_add_executable( TARGET drhook_papi_basic + SOURCES drhook_papi_basic.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + CONDITION HAVE_DR_HOOK_PAPI + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_papi_basic + COMMAND drhook_papi_basic + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + CONDITION HAVE_DR_HOOK_PAPI ) + +set_tests_properties(fiat_test_drhook_papi_basic + PROPERTIES PASS_REGULAR_EXPRESSION "Writing counter information of proc#1 into file" ) + + +ecbuild_add_test( TARGET fiat_test_drhook_papi_basic_valid_csv + TYPE SCRIPT + # Just making sure it's not an empty file + COMMAND "find" + ARGS "." "-name" "drhook.prof.0.csv" "-type" "f" "-size" "+100c" + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + CONDITION HAVE_DR_HOOK_PAPI ) + +set_tests_properties(fiat_test_drhook_papi_basic_valid_csv + PROPERTIES DEPENDS fiat_test_drhook_papi_basic + FAIL_REGULAR_EXPRESSION "no matches found" ) + +# Test MPI implementation + +ecbuild_add_executable( TARGET drhook_papi_mpi + SOURCES drhook_papi_mpi.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_papi_mpi + COMMAND drhook_papi_mpi + MPI 5 + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI ) + + +ecbuild_add_test( TARGET fiat_test_drhook_papi_mpi_valid_csv + TYPE SCRIPT + # Just making sure it's not an empty file + # We have to do this weird thing with bash so that we can + # use a redirect. CMake tests are really basic... + COMMAND "bash" + ARGS "-c" "find . -name 'drhook.prof.[1-5].csv' -type f -size +100c | wc -l" + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI ) + +set_tests_properties(fiat_test_drhook_papi_mpi_valid_csv + PROPERTIES DEPENDS fiat_test_drhook_papi_mpi + PASS_REGULAR_EXPRESSION "5" ) + +# Test user specified output file names + +ecbuild_add_executable( TARGET drhook_papi_user_filename + SOURCES drhook_papi_user_filename.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + CONDITION HAVE_DR_HOOK_PAPI + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_papi_user_filename + COMMAND drhook_papi_user_filename + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PROFILE=fiat_test_drhook_papi_user_filename + CONDITION HAVE_DR_HOOK_PAPI ) + +ecbuild_add_test( TARGET fiat_test_drhook_papi_user_filename_valid_csv + TYPE SCRIPT + # Just making sure it's not an empty file + COMMAND "find" + ARGS "." "-name" "fiat_test_drhook_papi_user_filename.1.csv" "-type" "f" + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + CONDITION HAVE_DR_HOOK_PAPI ) + +set_tests_properties(fiat_test_drhook_papi_user_filename_valid_csv + PROPERTIES DEPENDS fiat_test_drhook_papi_user_filename + FAIL_REGULAR_EXPRESSION "no matches found" ) + +# Test user specified counters + +ecbuild_add_executable( TARGET drhook_papi_user_counters + SOURCES drhook_papi_user_counters.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + CONDITION HAVE_DR_HOOK_PAPI + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_papi_user_counters + COMMAND drhook_papi_user_counters + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PAPI_COUNTERS=PAPI_TOT_INS DR_HOOK_PROFILE=fiat_test_drhook_papi_user_counters + CONDITION HAVE_DR_HOOK_PAPI ) \ No newline at end of file diff --git a/tests/drhook/drhook_papi/drhook_papi_basic.F90 b/tests/drhook/drhook_papi/drhook_papi_basic.F90 new file mode 100644 index 00000000..ed200208 --- /dev/null +++ b/tests/drhook/drhook_papi/drhook_papi_basic.F90 @@ -0,0 +1,27 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_papi_basic + + use yomhook, only : jphook, dr_hook + + implicit none + + real(jphook) :: zhook_handle + integer :: a + + call dr_hook('drhook_papi_basic', 0, zhook_handle) + + a = 1 + a = a + a + + call dr_hook('drhook_papi_basic', 1, zhook_handle) + +end program drhook_papi_basic + diff --git a/tests/drhook/drhook_papi/drhook_papi_mpi.F90 b/tests/drhook/drhook_papi/drhook_papi_mpi.F90 new file mode 100644 index 00000000..dce412f9 --- /dev/null +++ b/tests/drhook/drhook_papi/drhook_papi_mpi.F90 @@ -0,0 +1,34 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_papi_mpi + use mpl_module + use yomhook, only : jphook, dr_hook + use sdl_mod, only : sdl_traceback + implicit none + integer jpe, npes, mype, a + character(len=256) arg, env + real(jphook) :: zhook_handle + + call mpl_init(ldinfo=.false.) + call dr_hook('drhook_papi_mpi',0,zhook_handle) + + npes = mpl_nproc() + mype = mpl_myrank() + + do jpe=1,npes + if (mype == jpe) then + a = a + jpe + endif + enddo + + call mpl_barrier() + call dr_hook('drhook_papi_mpi',1,zhook_handle) + call mpl_end() +end program drhook_papi_mpi diff --git a/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 b/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 new file mode 100644 index 00000000..03434685 --- /dev/null +++ b/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 @@ -0,0 +1,27 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_papi_user_counters + + use yomhook, only : jphook, dr_hook + + implicit none + + real(jphook) :: zhook_handle + integer :: a + + call dr_hook('drhook_papi_user_counters', 0, zhook_handle) + + a = 1 + a = a + a + + call dr_hook('drhook_papi_user_counters', 1, zhook_handle) + +end program drhook_papi_user_counters + diff --git a/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 b/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 new file mode 100644 index 00000000..7938c33d --- /dev/null +++ b/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 @@ -0,0 +1,27 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_papi_user_filename + + use yomhook, only : jphook, dr_hook + + implicit none + + real(jphook) :: zhook_handle + integer :: a + + call dr_hook('drhook_papi_user_filename', 0, zhook_handle) + + a = 1 + a = a + a + + call dr_hook('drhook_papi_user_filename', 1, zhook_handle) + +end program drhook_papi_user_filename + From 8a558aa205ee39cba6e72037eb799831eadf4dba Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 28 Oct 2024 12:23:29 +0000 Subject: [PATCH 18/32] Fix nproc being initialised to 1 Previously nproc was initialised to 0. This meant that get_mon_out() wouldn't work as expected when not using MPI due to a bad assumption in an if statement. --- src/fiat/drhook/internal/dr_hook_procinfo.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fiat/drhook/internal/dr_hook_procinfo.F90 b/src/fiat/drhook/internal/dr_hook_procinfo.F90 index 9f36e78b..a79c6d7d 100644 --- a/src/fiat/drhook/internal/dr_hook_procinfo.F90 +++ b/src/fiat/drhook/internal/dr_hook_procinfo.F90 @@ -17,7 +17,7 @@ SUBROUTINE DR_HOOK_PROCINFO(KMYPROC, KNPROC) INTEGER(KIND=JPIM) :: IERROR KMYPROC=-1 -KNPROC=0 +KNPROC=1 CALL MPI_INITIALIZED(LMPI_INITIALIZED,IERROR) IF( LMPI_INITIALIZED ) THEN CALL MPI_COMM_SIZE(MPI_COMM_WORLD,KNPROC,IERROR) From 84d03624d7f2e04a0bfb1f15ed0994bdbbc505d7 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 28 Oct 2024 13:24:57 +0000 Subject: [PATCH 19/32] Rename dr_hook -> drhook --- src/fiat/drhook/extensions/papi/drhook_papi.c | 2 +- src/fiat/drhook/internal/drhook_run_omp_parallel.F90 | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.c b/src/fiat/drhook/extensions/papi/drhook_papi.c index 965120e8..7da1ad2d 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.c +++ b/src/fiat/drhook/extensions/papi/drhook_papi.c @@ -231,7 +231,7 @@ int drhook_papi_init(int rank){ return 1; } -int dr_hook_papi_start_threads(int* events){ +int drhook_papi_start_threads(int* events){ int thread=papi_safe_thread_num(); int papiErr; char pmsg[STD_MSG_LEN]; diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index 655fb27b..0d1170f0 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -13,11 +13,11 @@ module drhook_papi_interface #if defined(DR_HOOK_HAVE_PAPI) interface - function dr_hook_papi_start_threads ( events) bind ( c ) + function drhook_papi_start_threads ( events) bind ( c ) use, intrinsic :: iso_c_binding, only : c_int - integer(kind=c_int) :: dr_hook_papi_start_threads + integer(kind=c_int) :: drhook_papi_start_threads integer(kind=c_int), intent(inout) :: events(*) - end function dr_hook_papi_start_threads + end function drhook_papi_start_threads end interface #endif end module drhook_papi_interface @@ -95,7 +95,7 @@ subroutine drhook_run_omp_parallel_papi_startup(events, n, rcOut) bind(c) myThread=OML_MY_THREAD()-1 DO thread=0,nThreads-1 if (thread==myThread) then - rc=dr_hook_papi_start_threads(events) + rc=drhook_papi_start_threads(events) if (rc==0) rcOut=1 end if !$OMP BARRIER From 997bc63c2278df7cadb13d2b406c569de87e7a33 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 28 Oct 2024 14:04:46 +0000 Subject: [PATCH 20/32] Add copyright headers --- src/fiat/drhook/extensions/papi/drhook_papi.c | 12 +++++++++++- src/fiat/drhook/extensions/papi/drhook_papi.h | 10 ++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.c b/src/fiat/drhook/extensions/papi/drhook_papi.c index 7da1ad2d..141e4d75 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.c +++ b/src/fiat/drhook/extensions/papi/drhook_papi.c @@ -1,3 +1,13 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + #include "drhook_papi.h" #include #include @@ -20,7 +30,7 @@ int drhook_papi_rank=0; /* C style! */ static int papi_counter_event_codes[MAXNPAPICNTRS]; static char* papi_counter_names[MAXNPAPICNTRS]; -static int papi_counters_count; +static int papi_counters_count = 0; /* function to use for thread id - it should be better than omp_get_thread_num! diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.h b/src/fiat/drhook/extensions/papi/drhook_papi.h index d12f7bd0..0e3b2a57 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.h +++ b/src/fiat/drhook/extensions/papi/drhook_papi.h @@ -1,3 +1,13 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + #ifndef DRHOOK_PAPI #define DRHOOK_PAPI From e609d2381dd23e48b31160f343a69348f3d2d96b Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 30 Oct 2024 12:01:56 +0000 Subject: [PATCH 21/32] Revert "Fix nproc being initialised to 1" This reverts commit 8a558aa205ee39cba6e72037eb799831eadf4dba. --- src/fiat/drhook/internal/dr_hook_procinfo.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fiat/drhook/internal/dr_hook_procinfo.F90 b/src/fiat/drhook/internal/dr_hook_procinfo.F90 index a79c6d7d..9f36e78b 100644 --- a/src/fiat/drhook/internal/dr_hook_procinfo.F90 +++ b/src/fiat/drhook/internal/dr_hook_procinfo.F90 @@ -17,7 +17,7 @@ SUBROUTINE DR_HOOK_PROCINFO(KMYPROC, KNPROC) INTEGER(KIND=JPIM) :: IERROR KMYPROC=-1 -KNPROC=1 +KNPROC=0 CALL MPI_INITIALIZED(LMPI_INITIALIZED,IERROR) IF( LMPI_INITIALIZED ) THEN CALL MPI_COMM_SIZE(MPI_COMM_WORLD,KNPROC,IERROR) From 0bee022f767c210873228756196aaa1fcec29679 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Tue, 29 Oct 2024 13:45:26 +0000 Subject: [PATCH 22/32] Fix output formatting When outputting the value of DR_HOOK_PAPI_COUNTERS, the info prefix was missing. --- src/fiat/drhook/drhook.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 1dd38755..8731b466 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -2759,6 +2759,10 @@ process_options() p++; } p = strtok(s,delim); + if (p && fp) { + fprintf(fp,"%s %s [%s@%s:%d]",pfx,TIMESTR(tid),FFL); + newline = 1; + } for (int i = 0; p && i < drhook_papi_max_num_counters(); p = strtok(NULL,delim), i++) { drhook_papi_add_counter_name(strdup_drhook(p)); OPTPRINT(fp,"%s%s",comma,p); comma = ","; From f87140b3e18e2fe448aefc196969be3ad18ec02b Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 30 Oct 2024 09:20:33 +0000 Subject: [PATCH 23/32] Change initialising process info dr_hook_procinfo() now separates the setting of myproc & nproc from the reporting of initialising MPI. It will now initialise both myproc and nproc to 1 if DrHook is not running with MPI. To report the state of MPI, the new parameter mpi_init will be used. --- src/fiat/drhook/drhook.c | 8 ++++---- src/fiat/drhook/internal/dr_hook_procinfo.F90 | 10 +++++----- src/fiat/include/fiat/drhook.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 8731b466..97e412e8 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -2028,11 +2028,11 @@ signal_drhook_init(int enforce) char *env = getenv("DR_HOOK_SILENT"); int silent = env ? atoi(env) : 0; int j; - dr_hook_procinfo_(&myproc, &nproc); - if (myproc < 1) myproc = 1; /* Just to enable output as if myproc was == 1 */ + int mpi_init; + dr_hook_procinfo_(&myproc, &nproc, &mpi_init); /* Signals may not yet been set, since MPI not initialized - Only enforce-parameter can enforce to set these => no output on myproc=1 */ - if (!enforce && (myproc < 1 || nproc < 0)) return; + Enforce parameter for setting signals regardless of MPI state */ + if (!enforce && !mpi_init) return; if (signals_set) return; /* Extra safety */ /* To present sumpini.F90 (f.ex.) initializing DrHook-signals in case of DR_HOOK was turned off (=0), then set also export DR_HOOK_INIT_SIGNALS=0 */ diff --git a/src/fiat/drhook/internal/dr_hook_procinfo.F90 b/src/fiat/drhook/internal/dr_hook_procinfo.F90 index 9f36e78b..fed2c6e4 100644 --- a/src/fiat/drhook/internal/dr_hook_procinfo.F90 +++ b/src/fiat/drhook/internal/dr_hook_procinfo.F90 @@ -8,16 +8,16 @@ ! nor does it submit to any jurisdiction. ! -SUBROUTINE DR_HOOK_PROCINFO(KMYPROC, KNPROC) +SUBROUTINE DR_HOOK_PROCINFO(KMYPROC, KNPROC, LMPI_INITIALIZED) USE EC_PARKIND ,ONLY : JPIM USE MPL_MPIF IMPLICIT NONE -INTEGER(KIND=JPIM),INTENT(OUT) :: KMYPROC, KNPROC -LOGICAL :: LMPI_INITIALIZED +INTEGER(KIND=JPIM), INTENT(OUT) :: KMYPROC, KNPROC +LOGICAL, INTENT(OUT) :: LMPI_INITIALIZED INTEGER(KIND=JPIM) :: IERROR -KMYPROC=-1 -KNPROC=0 +KMYPROC=1 +KNPROC=1 CALL MPI_INITIALIZED(LMPI_INITIALIZED,IERROR) IF( LMPI_INITIALIZED ) THEN CALL MPI_COMM_SIZE(MPI_COMM_WORLD,KNPROC,IERROR) diff --git a/src/fiat/include/fiat/drhook.h b/src/fiat/include/fiat/drhook.h index bafb78e5..8f97bc79 100644 --- a/src/fiat/include/fiat/drhook.h +++ b/src/fiat/include/fiat/drhook.h @@ -187,7 +187,7 @@ dr_hook_prt_(const int *ftnunitno, , int s_len); extern void -dr_hook_procinfo_(int *myproc, int *nproc); +dr_hook_procinfo_(int *myproc, int *nproc, int *mpi_init); #ifdef __cplusplus } // extern "C" From aade15c56b44daccc2276657c72027cb7b4f4acf Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 30 Oct 2024 09:36:39 +0000 Subject: [PATCH 24/32] Fix drhook_papi_max_name_len() missing from header --- src/fiat/drhook/extensions/papi/drhook_papi.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.h b/src/fiat/drhook/extensions/papi/drhook_papi.h index 0e3b2a57..f02b813d 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.h +++ b/src/fiat/drhook/extensions/papi/drhook_papi.h @@ -18,6 +18,7 @@ int drhook_papi_init(int rank); int drhook_papi_num_counters(); int drhook_papi_max_num_counters(); +int drhook_papi_max_name_len(); void drhook_papi_counter_name(int c, char* event_name); void drhook_papi_add_counter_name(const char* counter_name); long_long drhook_papi_read(int counterId); From b93a1f382bdfcad9769590637ae2fbc7b5c67891 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 30 Oct 2024 09:54:26 +0000 Subject: [PATCH 25/32] Add drhook_papi_user_counters_more_than_max test Tests that any counters over MAXNPAPICNTRS are silently dropped. --- tests/drhook/drhook_papi/CMakeLists.txt | 22 ++++++++++++++- ...rhook_papi_user_counters_more_than_max.F90 | 27 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 diff --git a/tests/drhook/drhook_papi/CMakeLists.txt b/tests/drhook/drhook_papi/CMakeLists.txt index cf8b7c0b..b62f2737 100644 --- a/tests/drhook/drhook_papi/CMakeLists.txt +++ b/tests/drhook/drhook_papi/CMakeLists.txt @@ -106,4 +106,24 @@ ecbuild_add_executable( TARGET drhook_papi_user_counters ecbuild_add_test( TARGET fiat_test_drhook_papi_user_counters COMMAND drhook_papi_user_counters ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PAPI_COUNTERS=PAPI_TOT_INS DR_HOOK_PROFILE=fiat_test_drhook_papi_user_counters - CONDITION HAVE_DR_HOOK_PAPI ) \ No newline at end of file + CONDITION HAVE_DR_HOOK_PAPI ) + +set_tests_properties(fiat_test_drhook_papi_user_counters + PROPERTIES PASS_REGULAR_EXPRESSION "PAPI_TOT_INS" ) + +# Test user specified counters going over max allowed + +ecbuild_add_executable( TARGET drhook_papi_user_counters_more_than_max + SOURCES drhook_papi_user_counters_more_than_max.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + CONDITION HAVE_DR_HOOK_PAPI + NOINSTALL ) + +ecbuild_add_test( TARGET fiat_test_drhook_papi_user_counters_more_than_max + COMMAND drhook_papi_user_counters_more_than_max + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PAPI_COUNTERS=PAPI_TOT_CYC,PAPI_FP_OPS,PAPI_L1_DCA,PAPI_L2_DCM,PAPI_TOT_INS DR_HOOK_PROFILE=drhook_papi_user_counters_more_than_max + CONDITION HAVE_DR_HOOK_PAPI ) + +set_tests_properties(fiat_test_drhook_papi_user_counters_more_than_max + PROPERTIES FAIL_REGULAR_EXPRESSION "PAPI_TOT_INS" ) diff --git a/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 b/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 new file mode 100644 index 00000000..627687a1 --- /dev/null +++ b/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 @@ -0,0 +1,27 @@ +! (C) Copyright 2024- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drhook_papi_user_counters_more_than_max + + use yomhook, only : jphook, dr_hook + + implicit none + + real(jphook) :: zhook_handle + integer :: a + + call dr_hook('drhook_papi_user_counters_more_than_max', 0, zhook_handle) + + a = 1 + a = a + a + + call dr_hook('drhook_papi_user_counters_more_than_max', 1, zhook_handle) + +end program drhook_papi_user_counters_more_than_max + From 652ca482112d3ed0cb56528240b58c392d06adc6 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 30 Oct 2024 11:41:46 +0000 Subject: [PATCH 26/32] Fix set_tests_properties() not finding MPI tests Previously set_tests_properties() would fail because it couldn't find fiat_test_drhook_papi_mpi_valid_csv to add properties to. This was because fiat_test_drhook_papi_mpi_valid_csv was behind a condition that required MPI, while set_tests_properties() was not. --- tests/drhook/CMakeLists.txt | 8 ++++---- tests/drhook/drhook_papi/CMakeLists.txt | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/drhook/CMakeLists.txt b/tests/drhook/CMakeLists.txt index f8bbc5fa..ca619dfc 100644 --- a/tests/drhook/CMakeLists.txt +++ b/tests/drhook/CMakeLists.txt @@ -86,7 +86,7 @@ if (HAVE_DR_HOOK_NVTX) add_subdirectory(drhook_nvtx) endif () - # PAPI - if (HAVE_DR_HOOK_PAPI) - add_subdirectory(drhook_papi) - endif () \ No newline at end of file +# PAPI +if (HAVE_DR_HOOK_PAPI) + add_subdirectory(drhook_papi) +endif () \ No newline at end of file diff --git a/tests/drhook/drhook_papi/CMakeLists.txt b/tests/drhook/drhook_papi/CMakeLists.txt index b62f2737..a2664d49 100644 --- a/tests/drhook/drhook_papi/CMakeLists.txt +++ b/tests/drhook/drhook_papi/CMakeLists.txt @@ -64,9 +64,11 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_mpi_valid_csv ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI ) -set_tests_properties(fiat_test_drhook_papi_mpi_valid_csv - PROPERTIES DEPENDS fiat_test_drhook_papi_mpi - PASS_REGULAR_EXPRESSION "5" ) +if (HAVE_MPI) + set_tests_properties(fiat_test_drhook_papi_mpi_valid_csv + PROPERTIES DEPENDS fiat_test_drhook_papi_mpi + PASS_REGULAR_EXPRESSION "5" ) +endif() # Test user specified output file names From 19522b3c88b61937cde7d426fff0eb441f09af1b Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 30 Oct 2024 11:43:39 +0000 Subject: [PATCH 27/32] Use tid argument vs drhook_oml_get_thread_num() --- src/fiat/drhook/drhook.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 97e412e8..e153fe03 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -2938,7 +2938,7 @@ getkey(int tid, const char *name, int name_len, } #if defined(DR_HOOK_HAVE_NVTX) // Helps filter out wrapper calls that may be noise - if (opt_nvtx && drhook_oml_get_thread_num() == 1){ + if (opt_nvtx && tid == 1){ if (keyptr->calls > opt_nvtx_SCC && keyptr->delta_wall_all < opt_nvtx_SWT) { if (!opt_silent) fprintf(stderr,"DRHOOK:NVTX: Skipping opening of region %s\n", keyptr->name); From ac793d2a844c032cf01d16b2038ee16fdca90b7f Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Tue, 12 Nov 2024 15:10:10 +0000 Subject: [PATCH 28/32] Add guards to set_tests_properties() in tests Prevents errors of not finding a test when trying to set properties. --- tests/drhook/drhook_nvtx/CMakeLists.txt | 18 ++++++---- tests/drhook/drhook_papi/CMakeLists.txt | 36 ++++++++++++------- .../drhook_papi/drhook_papi_user_counters.F90 | 1 - ...rhook_papi_user_counters_more_than_max.F90 | 1 - .../drhook_papi/drhook_papi_user_filename.F90 | 1 - 5 files changed, 35 insertions(+), 22 deletions(-) diff --git a/tests/drhook/drhook_nvtx/CMakeLists.txt b/tests/drhook/drhook_nvtx/CMakeLists.txt index 9d186b65..ef24e16d 100644 --- a/tests/drhook/drhook_nvtx/CMakeLists.txt +++ b/tests/drhook/drhook_nvtx/CMakeLists.txt @@ -57,8 +57,10 @@ ecbuild_add_test( TARGET fiat_test_drhook_nvtx_mismatched_regions ENVIRONMENT DR_HOOK=1 DR_HOOK_NVTX=1 CONDITION HAVE_DR_HOOK_NVTX ) -set_tests_properties(fiat_test_drhook_nvtx_mismatched_regions - PROPERTIES WILL_FAIL TRUE ) +if (TEST fiat_test_drhook_nvtx_mismatched_regions) + set_tests_properties(fiat_test_drhook_nvtx_mismatched_regions + PROPERTIES WILL_FAIL TRUE ) +endif() # Test skip on spammy regions @@ -76,8 +78,10 @@ ecbuild_add_test( TARGET fiat_test_drhook_nvtx_skip_spam_regions ENVIRONMENT DR_HOOK=1 DR_HOOK_NVTX=1 DR_HOOK_SILENT=0 CONDITION HAVE_DR_HOOK_NVTX ) -set_tests_properties(fiat_test_drhook_nvtx_skip_spam_regions - PROPERTIES PASS_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping closing of region foo" PASS_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping opening of region foo" ) +if (TEST fiat_test_drhook_nvtx_skip_spam_regions) + set_tests_properties(fiat_test_drhook_nvtx_skip_spam_regions + PROPERTIES PASS_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping closing of region foo" PASS_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping opening of region foo" ) +endif() # Test not to skip on spammy regions with long runtimes @@ -95,5 +99,7 @@ ecbuild_add_test( TARGET fiat_test_drhook_nvtx_no_skip_spam_regions ENVIRONMENT DR_HOOK=1 DR_HOOK_NVTX=1 DR_HOOK_SILENT=0 CONDITION HAVE_DR_HOOK_NVTX ) -set_tests_properties(fiat_test_drhook_nvtx_no_skip_spam_regions - PROPERTIES FAIL_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping closing of region foo" FAIL_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping opening of region foo" ) +if (TEST fiat_test_drhook_nvtx_no_skip_spam_regions) + set_tests_properties(fiat_test_drhook_nvtx_no_skip_spam_regions + PROPERTIES FAIL_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping closing of region foo" FAIL_REGULAR_EXPRESSION "DRHOOK:NVTX: Skipping opening of region foo" ) +endif() diff --git a/tests/drhook/drhook_papi/CMakeLists.txt b/tests/drhook/drhook_papi/CMakeLists.txt index a2664d49..41ae28c2 100644 --- a/tests/drhook/drhook_papi/CMakeLists.txt +++ b/tests/drhook/drhook_papi/CMakeLists.txt @@ -22,8 +22,10 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_basic ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS CONDITION HAVE_DR_HOOK_PAPI ) -set_tests_properties(fiat_test_drhook_papi_basic - PROPERTIES PASS_REGULAR_EXPRESSION "Writing counter information of proc#1 into file" ) +if (TEST fiat_test_drhook_papi_basic) + set_tests_properties(fiat_test_drhook_papi_basic + PROPERTIES PASS_REGULAR_EXPRESSION "Writing counter information of proc#1 into file" ) +endif() ecbuild_add_test( TARGET fiat_test_drhook_papi_basic_valid_csv @@ -34,9 +36,11 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_basic_valid_csv ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS CONDITION HAVE_DR_HOOK_PAPI ) -set_tests_properties(fiat_test_drhook_papi_basic_valid_csv - PROPERTIES DEPENDS fiat_test_drhook_papi_basic - FAIL_REGULAR_EXPRESSION "no matches found" ) +if (TEST fiat_test_drhook_papi_basic_valid_csv) + set_tests_properties(fiat_test_drhook_papi_basic_valid_csv + PROPERTIES DEPENDS fiat_test_drhook_papi_basic + FAIL_REGULAR_EXPRESSION "no matches found" ) +endif() # Test MPI implementation @@ -64,7 +68,7 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_mpi_valid_csv ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI ) -if (HAVE_MPI) +if (TEST fiat_test_drhook_papi_mpi_valid_csv) set_tests_properties(fiat_test_drhook_papi_mpi_valid_csv PROPERTIES DEPENDS fiat_test_drhook_papi_mpi PASS_REGULAR_EXPRESSION "5" ) @@ -92,9 +96,11 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_user_filename_valid_csv ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS CONDITION HAVE_DR_HOOK_PAPI ) -set_tests_properties(fiat_test_drhook_papi_user_filename_valid_csv - PROPERTIES DEPENDS fiat_test_drhook_papi_user_filename - FAIL_REGULAR_EXPRESSION "no matches found" ) +if (TEST fiat_test_drhook_papi_user_filename_valid_csv) + set_tests_properties(fiat_test_drhook_papi_user_filename_valid_csv + PROPERTIES DEPENDS fiat_test_drhook_papi_user_filename + FAIL_REGULAR_EXPRESSION "no matches found" ) +endif() # Test user specified counters @@ -110,8 +116,10 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_user_counters ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PAPI_COUNTERS=PAPI_TOT_INS DR_HOOK_PROFILE=fiat_test_drhook_papi_user_counters CONDITION HAVE_DR_HOOK_PAPI ) -set_tests_properties(fiat_test_drhook_papi_user_counters - PROPERTIES PASS_REGULAR_EXPRESSION "PAPI_TOT_INS" ) +if (TEST fiat_test_drhook_papi_user_counters) + set_tests_properties(fiat_test_drhook_papi_user_counters + PROPERTIES PASS_REGULAR_EXPRESSION "PAPI_TOT_INS" ) +endif() # Test user specified counters going over max allowed @@ -127,5 +135,7 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_user_counters_more_than_max ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PAPI_COUNTERS=PAPI_TOT_CYC,PAPI_FP_OPS,PAPI_L1_DCA,PAPI_L2_DCM,PAPI_TOT_INS DR_HOOK_PROFILE=drhook_papi_user_counters_more_than_max CONDITION HAVE_DR_HOOK_PAPI ) -set_tests_properties(fiat_test_drhook_papi_user_counters_more_than_max - PROPERTIES FAIL_REGULAR_EXPRESSION "PAPI_TOT_INS" ) +if (TEST fiat_test_drhook_papi_user_counters_more_than_max) + set_tests_properties(fiat_test_drhook_papi_user_counters_more_than_max + PROPERTIES FAIL_REGULAR_EXPRESSION "PAPI_TOT_INS" ) +endif() diff --git a/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 b/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 index 03434685..00796d17 100644 --- a/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 +++ b/tests/drhook/drhook_papi/drhook_papi_user_counters.F90 @@ -24,4 +24,3 @@ program drhook_papi_user_counters call dr_hook('drhook_papi_user_counters', 1, zhook_handle) end program drhook_papi_user_counters - diff --git a/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 b/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 index 627687a1..ffc8b386 100644 --- a/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 +++ b/tests/drhook/drhook_papi/drhook_papi_user_counters_more_than_max.F90 @@ -24,4 +24,3 @@ program drhook_papi_user_counters_more_than_max call dr_hook('drhook_papi_user_counters_more_than_max', 1, zhook_handle) end program drhook_papi_user_counters_more_than_max - diff --git a/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 b/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 index 7938c33d..4ca4c3e0 100644 --- a/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 +++ b/tests/drhook/drhook_papi/drhook_papi_user_filename.F90 @@ -24,4 +24,3 @@ program drhook_papi_user_filename call dr_hook('drhook_papi_user_filename', 1, zhook_handle) end program drhook_papi_user_filename - From 82a5b5063bb601808f057bd7cb7390d09ca62c59 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Tue, 12 Nov 2024 15:11:27 +0000 Subject: [PATCH 29/32] Add C_DR_HOOK_PROCINFO() Used to ensure type safety between C and Fortran for DR_HOOK_PROCINFO() --- src/fiat/drhook/drhook.c | 2 +- src/fiat/drhook/internal/dr_hook_procinfo.F90 | 11 +++++++++++ src/fiat/include/fiat/drhook.h | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index e153fe03..db00bce4 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -2029,7 +2029,7 @@ signal_drhook_init(int enforce) int silent = env ? atoi(env) : 0; int j; int mpi_init; - dr_hook_procinfo_(&myproc, &nproc, &mpi_init); + c_dr_hook_procinfo(&myproc, &nproc, &mpi_init); /* Signals may not yet been set, since MPI not initialized Enforce parameter for setting signals regardless of MPI state */ if (!enforce && !mpi_init) return; diff --git a/src/fiat/drhook/internal/dr_hook_procinfo.F90 b/src/fiat/drhook/internal/dr_hook_procinfo.F90 index fed2c6e4..a469c0c0 100644 --- a/src/fiat/drhook/internal/dr_hook_procinfo.F90 +++ b/src/fiat/drhook/internal/dr_hook_procinfo.F90 @@ -25,3 +25,14 @@ SUBROUTINE DR_HOOK_PROCINFO(KMYPROC, KNPROC, LMPI_INITIALIZED) KMYPROC = KMYPROC+1 ! 1-based in IFS context ENDIF END SUBROUTINE DR_HOOK_PROCINFO + +SUBROUTINE C_DR_HOOK_PROCINFO(KMYPROC, KNPROC, KMPI_INITIALIZED) BIND(C, name="c_dr_hook_procinfo") + USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_INT + IMPLICIT NONE + INTEGER(KIND=C_INT), INTENT(OUT) :: KMYPROC, KNPROC, KMPI_INITIALIZED + LOGICAL :: LLMPI_INITIALIZED + + CALL DR_HOOK_PROCINFO(KMYPROC, KNPROC, LLMPI_INITIALIZED) + + KMPI_INITIALIZED = MERGE(1, 0, LLMPI_INITIALIZED) +END SUBROUTINE C_DR_HOOK_PROCINFO diff --git a/src/fiat/include/fiat/drhook.h b/src/fiat/include/fiat/drhook.h index 8f97bc79..82d292e3 100644 --- a/src/fiat/include/fiat/drhook.h +++ b/src/fiat/include/fiat/drhook.h @@ -187,7 +187,7 @@ dr_hook_prt_(const int *ftnunitno, , int s_len); extern void -dr_hook_procinfo_(int *myproc, int *nproc, int *mpi_init); +c_dr_hook_procinfo(int *myproc, int *nproc, int *mpi_init); #ifdef __cplusplus } // extern "C" From aaad9fef2368c978768da2623012888678f6a659 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Tue, 12 Nov 2024 15:43:10 +0000 Subject: [PATCH 30/32] Fix compiler warnings --- src/fiat/drhook/drhook.c | 6 ++++-- src/fiat/drhook/extensions/papi/drhook_papi.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index db00bce4..242e1c98 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -2257,7 +2257,9 @@ process_options() if(fp) fprintf(fp,"[EC_DRHOOK:hostname:myproc:omltid:pid:unixtid] [YYYYMMDD:HHMMSS:walltime] [function@file:lineno] -- Max OpenMP threads = %d\n",drhook_oml_get_max_threads()); OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_SILENT=%d\n",pfx,TIMESTR(tid),FFL,opt_silent); - OPTPRINT(fp,"%s %s [%s@%s:%d] fp = %p\n",pfx,TIMESTR(tid),FFL,(void*)fp); + // Compiler gets concerned that we may be reading and writing to fp otherwise... + void *definitely_not_fp = (void*)fp; + OPTPRINT(fp,"%s %s [%s@%s:%d] fp = %p\n",pfx,TIMESTR(tid),FFL,definitely_not_fp); env = getenv("ATP_ENABLED"); atp_enabled = env ? atoi(env) : 0; @@ -2567,7 +2569,7 @@ process_options() if (opt_nvtx_SWT < 0) opt_nvtx_SWT = nvtx_SWT_default; - OPTPRINT(fp, "%s %s [%s@%s:%g] DR_HOOK_NVTX_SPAM_WT=%g\n", pfx, TIMESTR(tid), FFL, nvtx_SWT_default); + OPTPRINT(fp, "%s %s [%s@%s:%d] DR_HOOK_NVTX_SPAM_WT=%g\n", pfx, TIMESTR(tid), FFL, nvtx_SWT_default); } } diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.c b/src/fiat/drhook/extensions/papi/drhook_papi.c index 141e4d75..d6e51944 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.c +++ b/src/fiat/drhook/extensions/papi/drhook_papi.c @@ -29,7 +29,7 @@ int drhook_papi_state=drhook_papi_notstarted; int drhook_papi_rank=0; /* C style! */ static int papi_counter_event_codes[MAXNPAPICNTRS]; -static char* papi_counter_names[MAXNPAPICNTRS]; +static const char* papi_counter_names[MAXNPAPICNTRS]; static int papi_counters_count = 0; /* function to use for thread id @@ -234,7 +234,7 @@ int drhook_papi_init(int rank){ return 0; for (int i=0; i < drhook_papi_max_num_counters(); i++) - free(papi_counter_names[i]); + free((void *) papi_counter_names[i]); drhook_papi_state=drhook_papi_running; if (drhook_papi_rank==0 && !silent) printf("DRHOOK:PAPI: Initialisation sucess\n"); From 7558ac5a84855407c6ae6b9a304348140604e854 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Wed, 13 Nov 2024 13:59:40 +0000 Subject: [PATCH 31/32] Fix valid_csv tests Tests were accidentally searching for the wrong string in the regex check --- tests/drhook/drhook_papi/CMakeLists.txt | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/drhook/drhook_papi/CMakeLists.txt b/tests/drhook/drhook_papi/CMakeLists.txt index 41ae28c2..0f641e97 100644 --- a/tests/drhook/drhook_papi/CMakeLists.txt +++ b/tests/drhook/drhook_papi/CMakeLists.txt @@ -32,14 +32,13 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_basic_valid_csv TYPE SCRIPT # Just making sure it's not an empty file COMMAND "find" - ARGS "." "-name" "drhook.prof.0.csv" "-type" "f" "-size" "+100c" - ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + ARGS "." "-name" "drhook.prof.1.csv" "-type" "f" "-size" "+100c" CONDITION HAVE_DR_HOOK_PAPI ) if (TEST fiat_test_drhook_papi_basic_valid_csv) set_tests_properties(fiat_test_drhook_papi_basic_valid_csv PROPERTIES DEPENDS fiat_test_drhook_papi_basic - FAIL_REGULAR_EXPRESSION "no matches found" ) + PASS_REGULAR_EXPRESSION "drhook.prof.1.csv" ) endif() # Test MPI implementation @@ -54,7 +53,7 @@ ecbuild_add_executable( TARGET drhook_papi_mpi ecbuild_add_test( TARGET fiat_test_drhook_papi_mpi COMMAND drhook_papi_mpi MPI 5 - ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS DR_HOOK_PROFILE=fiat_test_drhook_papi_mpi CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI ) @@ -64,8 +63,7 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_mpi_valid_csv # We have to do this weird thing with bash so that we can # use a redirect. CMake tests are really basic... COMMAND "bash" - ARGS "-c" "find . -name 'drhook.prof.[1-5].csv' -type f -size +100c | wc -l" - ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS + ARGS "-c" "find . -name 'fiat_test_drhook_papi_mpi.[1-5].csv' -type f -size +100c | wc -l" CONDITION HAVE_DR_HOOK_PAPI AND HAVE_MPI ) if (TEST fiat_test_drhook_papi_mpi_valid_csv) @@ -93,13 +91,12 @@ ecbuild_add_test( TARGET fiat_test_drhook_papi_user_filename_valid_csv # Just making sure it's not an empty file COMMAND "find" ARGS "." "-name" "fiat_test_drhook_papi_user_filename.1.csv" "-type" "f" - ENVIRONMENT DR_HOOK=1 DR_HOOK_OPT=COUNTERS CONDITION HAVE_DR_HOOK_PAPI ) if (TEST fiat_test_drhook_papi_user_filename_valid_csv) set_tests_properties(fiat_test_drhook_papi_user_filename_valid_csv PROPERTIES DEPENDS fiat_test_drhook_papi_user_filename - FAIL_REGULAR_EXPRESSION "no matches found" ) + PASS_REGULAR_EXPRESSION "fiat_test_drhook_papi_user_filename.1.csv" ) endif() # Test user specified counters From b20736989ad92787af27bc71aefc77033cbc3643 Mon Sep 17 00:00:00 2001 From: Andrew Beggs Date: Mon, 10 Feb 2025 15:53:39 +0000 Subject: [PATCH 32/32] Remove reliance on papi.h when building without PAPI Previously papi.h was needed by drhook_papi.h, even when building without PAPI. Now it is only needed when PAPI is requested as the type is defined locally. --- src/fiat/drhook/drhook.c | 9 ++++++++- src/fiat/drhook/extensions/papi/drhook_papi.h | 2 -- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 242e1c98..9743232b 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -82,12 +82,19 @@ static int backtrace(void **buffer, int size) { return 0; } #include #include #include + +// Extension headers #ifdef DR_HOOK_HAVE_NVTX #include "dr_hook_nvtx.h" #endif +#ifdef DR_HOOK_HAVE_PAPI +#include "drhook_papi.h" +#else +// This type is in the signature of remove_calltree() +#define long_long long long +#endif #include "ec_get_cycles.h" -#include "drhook_papi.h" static long long int *thread_cycles = NULL; int drhook_lhook = 1; // NOTE: A global variable !! diff --git a/src/fiat/drhook/extensions/papi/drhook_papi.h b/src/fiat/drhook/extensions/papi/drhook_papi.h index f02b813d..8f3c0306 100644 --- a/src/fiat/drhook/extensions/papi/drhook_papi.h +++ b/src/fiat/drhook/extensions/papi/drhook_papi.h @@ -44,6 +44,4 @@ void drhook_papi_bzero(long_long* a); void drhook_papi_print(char* s, long_long* a, int header); -#else -#define long_long long long #endif