From 524f94c55bb6ca87514756b978d4a470a236d6ce Mon Sep 17 00:00:00 2001 From: nprasadm Date: Tue, 18 Oct 2022 12:37:47 +0530 Subject: [PATCH 01/30] aocl-scaLAPACK : AOCL progress callback hardening Signed-off-by: Nagendra AMD-Internal: [CPUPL-2506 ] Change-Id: I63b23b5e285d38b00edeb845704b8a9f6e263152 --- SRC/aocl_scalapack_progress.c | 5 +-- SRC/aocl_scalapack_progress.h | 19 +++++---- SRC/pdgetrf0.f | 14 ++++--- TESTING/AOCL_PROGRESS_TESTS/README.txt | 41 +++++++++---------- .../test_aocl_progress_pdgetrf.c | 16 ++++---- 5 files changed, 50 insertions(+), 45 deletions(-) diff --git a/SRC/aocl_scalapack_progress.c b/SRC/aocl_scalapack_progress.c index 0ac27935..0a52212b 100644 --- a/SRC/aocl_scalapack_progress.c +++ b/SRC/aocl_scalapack_progress.c @@ -20,11 +20,10 @@ void aocl_scalapack_set_progress_( aocl_scalapack_progress_callback func ) aocl_scalapack_progress_ptr_ = func; } -void aocl_scalapack_progress_(char* api, integer *lenapi, integer* progress, - integer* current_process, integer *total_processes) +void aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, + const integer* current_process, const integer *total_processes) { integer ret; - if (aocl_scalapack_progress_ptr_ != NULL ) { ret = aocl_scalapack_progress_ptr_ ( api, lenapi, progress, current_process, total_processes); } diff --git a/SRC/aocl_scalapack_progress.h b/SRC/aocl_scalapack_progress.h index 5941d261..ac295258 100644 --- a/SRC/aocl_scalapack_progress.h +++ b/SRC/aocl_scalapack_progress.h @@ -22,16 +22,21 @@ typedef unsigned long uinteger; #endif typedef integer ( *aocl_scalapack_progress_callback )( -char *api, -integer *lenapi, -integer *progress, -integer *current_process, -integer *total_processes +const char * const api, +const integer *lenapi, +const integer *progress, +const integer *current_process, +const integer *total_processes ); -integer aocl_scalapack_progress ( char* api, integer *lenapi, integer* progress, - integer* current_process, integer *total_processes ); +integer aocl_scalapack_progress ( +const char * const api, +const integer *lenapi, +const integer *progress, +const integer *current_process, +const integer *total_processes +); aocl_scalapack_progress_callback aocl_scalapack_progress_ptr_; diff --git a/SRC/pdgetrf0.f b/SRC/pdgetrf0.f index 502ad19e..e69066ca 100644 --- a/SRC/pdgetrf0.f +++ b/SRC/pdgetrf0.f @@ -148,8 +148,11 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) $ MN, MYCOL, MYROW, NPCOL, NPROW * #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS + +* .. Declaring below API name string and its length as const objects + CHARACTER*8, PARAMETER :: API_NAME = 'PDGETRF ' + INTEGER, PARAMETER :: LSTAGE = 8 #endif * .. * .. Local Arrays .. @@ -241,8 +244,6 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) #ifdef AOCL_PROGRESS CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PDGETRF' #endif * Factor diagonal and subdiagonal blocks and test for exact @@ -279,8 +280,11 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA #ifdef AOCL_PROGRESS +* Capture the Loop count 'J' to a separate 'PROGRESS' variable +* to avoid the corruption at application side. + PROGRESS = J CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) #endif * * Factor diagonal and subdiagonal blocks and test for exact diff --git a/TESTING/AOCL_PROGRESS_TESTS/README.txt b/TESTING/AOCL_PROGRESS_TESTS/README.txt index 3a3d83a4..e9dfd9c5 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/README.txt +++ b/TESTING/AOCL_PROGRESS_TESTS/README.txt @@ -1,26 +1,28 @@ Checking AOCL-ScaLAPACK Operation Progress +=========================================== AOCL libraries may be used to perform lengthy computations (for example, matrix multiplications, solver involving large matrices). These operations/computations may go on for hours. AOCL progress feature provides mechanism for the application to check how far the computations have progressed. Selected set of APIs of AOCL libraries periodically updates the application with progress made so far via a callback function. Usage: - +====== The Application needs to define a callback function in specific format and register this callback function with the AOCL-ScaLAPACK library. The callback function prototype must be as defined below. int aocl_scalapack_progress( -char* api, -integer lenapi, -integer *progress, -integer *mpi_rank, -integer *total_mpi_processes +const char * const api, +const integer *lenapi, +const integer *progress, +const integer *mpi_rank, +const integer *total_mpi_processes ) -The table below explains various parameters +The table below explains various parameters: +----------------------------------------------------------------------------- Parameters | Purpose ---------------------------------------------------------------------- +----------------------------------------------------------------------------- api | Name of the API which is currently running lenapi | Length of API name character buffer progress | Linear progress made in current thread so far @@ -28,33 +30,28 @@ mpi_rank | Current process rank total_mpi_processes | Total number of processes used to perform the operation Callback Registration: - +---------------------- The callback function must be registered with library for it to report the progress. aocl_scalapack_set_progress(aocl_scalapack_progress); Example: -int aocl_scalapack_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes) +-------- +int aocl_scalapack_progress(const char* const api, const int *lenapi, const int *progress, const int *mpi_rank, const int *total_mpi_processes) { printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); return 0; } -Limitations -- AOCL-ScALAPACK Progress feature is currently supported only on Linux - Procedure to build and run the sample application with aocl progress feature: ============================================================================= -1) copy below AOCL libraries to the "/EXAMPLE/aocl_progress_example/" folder: - 1) libscalapack.a - 2) libflame.a - 3) blis-mt.a +1) The scalapack build system generates aocl-progress related test binaries along with test suite application as part of the build process. + Refer AOCL User guide for the scalapack build process. -2) Run the below command to build the 'pdgetrf' test application with 'aocl-progress' feature. - mpicc -O0 -g pdgerf_example_app.c libscalapack.a libflame.a -fopenmp libblis-mt.a -lm -lgfortran -o test +2) The aocl-progress related tests generated in 'TESTING/AOCL_PROGRESS_TESTS' folder in the build folder. -3) Run the below commands to run the application: - Ex: mpirun -np 4 ./test 32 8 2 2 - mpirun -np 8 ./test 1024 32 4 2 +3) The aocl-progress related tests can be run with the below command: + Ex: mpirun -np 4 ./xap_pdgetrf 32 8 2 2 + mpirun -np 8 ./xap_pdgetrf 1024 32 4 2 diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c index 3d29301d..c4e11afb 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c @@ -19,13 +19,14 @@ void pdgetrf_(Int*, Int*, double*, Int*, Int*, Int*, Int*, Int*); void blacs_gridexit_(Int*); Int numroc_(Int*, Int*, Int*, Int*, Int*); -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); +Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes); -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes) { - char api_name[20]; + char api_name [30]; memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + api_name[*lenapi - 1] = '\0'; + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i \n", *mpi_rank, api_name, *progress,*total_mpi_processes ); return 0; } @@ -61,7 +62,6 @@ int main(int argc, char **argv) { } assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); // Initialize BLACS Int iam, nprocs; @@ -81,13 +81,13 @@ int main(int argc, char **argv) { // Allocate and fill the matrices A and B // A[I,J] = (I == J ? 5*n : I+J) double *A; - Int *IPPIV; + Int *IPPIV; A = (double *)calloc(mpA*nqA,sizeof(double)) ; if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - + Int k = 0; for (Int j = 0; j < nqA; j++) { // local col Int l_j = j / nb; // which block From 8d6ab54f25ec59e248af5f770c66b8e8eec2ccec Mon Sep 17 00:00:00 2001 From: "nprasadm@amd.com" Date: Thu, 24 Nov 2022 10:47:54 +0530 Subject: [PATCH 02/30] aocl-scaLAPACK: Version string updated to 4.0.1 Change-Id: I550bb8bcf4a81f158276ed6a405efdcf87f3b2c9 --- SRC/get_aocl_scalapack_version.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/SRC/get_aocl_scalapack_version.c b/SRC/get_aocl_scalapack_version.c index 9cd0dfe2..78e0bba7 100644 --- a/SRC/get_aocl_scalapack_version.c +++ b/SRC/get_aocl_scalapack_version.c @@ -27,24 +27,24 @@ void get_aocl_scalapack_version_( version ) #endif { #ifdef AOCL_SCALAPACK_VERSION - char slmainversion[] = "AOCL-ScaLAPACK 4.0 "; + char slmainversion[] = "AOCL-ScaLAPACK 4.0.1 "; char slversion[1000]; char scalapackversion[] = ", supports ScaLAPACK 2.2.0"; int length, i; length = 0; - for (i = 0; i Date: Mon, 28 Nov 2022 10:52:11 +0530 Subject: [PATCH 03/30] convert the files in dos format to unix Change-Id: I51f18032f34a244cd7ff5231958f7f1636c62e0c --- AOCL_DTL/README.md | 38 +- AOCL_DTL/aocldtl.c | 978 +++++++++--------- AOCL_DTL/aocldtl.h | 338 +++--- AOCL_DTL/aocldtlcf.h | 152 +-- AOCL_DTL/aoclfal.c | 530 +++++----- AOCL_DTL/aoclfal.h | 102 +- AOCL_DTL/aocltpdef.h | 84 +- .../pdgerf_example_app.c | 264 ++--- .../test_aocl_progress_pcgeqrf.c | 312 +++--- .../test_aocl_progress_pcgetrf.c | 282 ++--- .../test_aocl_progress_pcpotrf.c | 272 ++--- .../test_aocl_progress_pdgeqrf.c | 304 +++--- .../test_aocl_progress_pdgetrf.c | 278 ++--- .../test_aocl_progress_pdpotrf.c | 268 ++--- .../test_aocl_progress_psgeqrf.c | 306 +++--- .../test_aocl_progress_psgetrf.c | 280 ++--- .../test_aocl_progress_pspotrf.c | 266 ++--- .../test_aocl_progress_pzgeqrf.c | 314 +++--- .../test_aocl_progress_pzgetrf.c | 282 ++--- .../test_aocl_progress_pzpotrf.c | 272 ++--- 20 files changed, 2961 insertions(+), 2961 deletions(-) diff --git a/AOCL_DTL/README.md b/AOCL_DTL/README.md index 5da5150a..a3e63b21 100644 --- a/AOCL_DTL/README.md +++ b/AOCL_DTL/README.md @@ -1,19 +1,19 @@ -############################################################################### -Guidelines to enable logging and tracing in ScaLAPACK library -############################################################################### - -Following are the steps to enable Trace and Log. - -1. Open header file "aocl-scalapack/AOCL_DTL/aocldtlcf.h" - i. Enable Trace by making the following macro as 1 : - #define AOCL_DTL_TRACE_ENABLE 1 - ii. Enable Log by making the following macro as 1 : - #define AOCL_DTL_LOG_ENABLE 1 - -2. After Step 1, configure the cmake with -DENABLE_DTL=ON config option to enable AOCL DTL feature. - For Example: cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp libblis-mt.a" -DLAPACK_LIBRARIES=/libflame.a -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 -DUSE_OPTIMIZED_LAPACK_BLAS=OFF -DENABLE_DTL=ON - -3. Currently the DTL is supported only for the LU factorization API 'pdgetrf'. - -4. After the ScaLAPACK test suite is built (Refer the latest AOCL-userGuide for the build steps), execute the LU test application (with command: "mpirun ./xdlu" ) to get the DTL trace, log files. - For Example: "P31243_T31243_aocldtl_trace.txt" and "P31243_T31243_aocldtl_log.txt". +############################################################################### +Guidelines to enable logging and tracing in ScaLAPACK library +############################################################################### + +Following are the steps to enable Trace and Log. + +1. Open header file "aocl-scalapack/AOCL_DTL/aocldtlcf.h" + i. Enable Trace by making the following macro as 1 : + #define AOCL_DTL_TRACE_ENABLE 1 + ii. Enable Log by making the following macro as 1 : + #define AOCL_DTL_LOG_ENABLE 1 + +2. After Step 1, configure the cmake with -DENABLE_DTL=ON config option to enable AOCL DTL feature. + For Example: cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp libblis-mt.a" -DLAPACK_LIBRARIES=/libflame.a -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 -DUSE_OPTIMIZED_LAPACK_BLAS=OFF -DENABLE_DTL=ON + +3. Currently the DTL is supported only for the LU factorization API 'pdgetrf'. + +4. After the ScaLAPACK test suite is built (Refer the latest AOCL-userGuide for the build steps), execute the LU test application (with command: "mpirun ./xdlu" ) to get the DTL trace, log files. + For Example: "P31243_T31243_aocldtl_trace.txt" and "P31243_T31243_aocldtl_log.txt". diff --git a/AOCL_DTL/aocldtl.c b/AOCL_DTL/aocldtl.c index 4096f6a8..e53a2e6e 100644 --- a/AOCL_DTL/aocldtl.c +++ b/AOCL_DTL/aocldtl.c @@ -1,489 +1,489 @@ -/*=================================================================== - * File Name : aocldtl.c - * - * Description : This file contains main logging functions. - * These functions are invoked though macros by - * end user. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#include "aocltpdef.h" -#include "aocldtl.h" -#include "aoclfal.h" -#include "aocldtlcf.h" -#include "aoclflist.h" -#include "aoclos.h" - -#ifdef AOCL_DTL_AUTO_TRACE_ENABLE -#if defined(__linux__) -#define __USE_GNU -#include -#endif -#endif - -/* By default the trace level will be set to ALL User can configure this - parameter at run time using command line argument */ -uint32 gui32TraceLogLevel = AOCL_DTL_TRACE_LEVEL; - -/* The user can configure the file name in which he wants to dump the data */ -#if AOCL_DTL_TRACE_ENABLE -/* The file name for storing traced log added manually in the code */ -static char *pchDTL_TRACE_FILE = AOCL_DTL_TRACE_FILE; - -/* Global file pointer for trace logging */ -AOCL_FLIST_Node *gpTraceFileList = NULL; - -#endif - -#if AOCL_DTL_LOG_ENABLE -/* The file name for storing log data */ -static char *pchDTL_LOG_FILE = AOCL_DTL_LOG_FILE; - -/* Global file pointer for logging the results */ -AOCL_FLIST_Node *gpLogFileList = NULL; -#endif - -#if AOCL_DTL_AUTO_TRACE_ENABLE - -/* The file name for storing execution trace, - These files are used by compiler assisted execution testing */ -static char *pchDTL_AUTO_TRACE_FILE = AOCL_DTL_AUTO_TRACE_FILE; - -/* Global file pointer for logging the results */ -AOCL_FLIST_Node *gpAutoTraceFileList = NULL; -#endif - -/*=================================================================== -* Function Name : DTL_Initialize -* Description : Creates/Opens log file and initializes the -* global trace log level -* Input Parameter(s) : ui32CurrentLogLevel - current log level -* which user can configure at run time -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#ifdef AOCL_DTL_INITIALIZE_ENABLE - -void DTL_Initialize( - uint32 ui32CurrentLogLevel) -{ - /* If user selects invalid trace log level then the dafault trace log level - will be AOCL_DTL_LEVEL_ALL */ - if ((ui32CurrentLogLevel < 1) || (ui32CurrentLogLevel > AOCL_DTL_LEVEL_ALL)) - { - gui32TraceLogLevel = AOCL_DTL_LEVEL_ALL; - } - else - { - /* Assign the user requested log level to the global trace log level */ - gui32TraceLogLevel = ui32CurrentLogLevel; - } - -#if AOCL_DTL_TRACE_ENABLE - /* Create/Open the file to log the traced data */ - AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); - - if (NULL == gpTraceFileList) - { - /* Unable to open the specified file.*/ - AOCL_DEBUGPRINT("Unable to create the trace file %s\n", pchDTL_TRACE_FILE); - return; - } -#endif - -#if AOCL_DTL_LOG_ENABLE - /* Create/Open the file to log the log data */ - AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); - - if (NULL == gpLogFileList) - { - /* Unable to open the specified file.*/ - AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_LOG_FILE); - return; - } -#endif - -#if AOCL_DTL_AUTO_TRACE_ENABLE - /* Create/Open the file to log the log data */ - AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); - - if (NULL == gpAutoTraceFileList) - { - /* Unable to open the specified file.*/ - AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_AUTO_TRACE_FILE); - return; - } -#endif - -} /* DTL_Initialize */ -#endif - -/*=================================================================== -* Function Name : DTL_Uninitialize -* Description : Close all the log files -* Input Parameter(s) : void -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#ifdef AOCL_DTL_INITIALIZE_ENABLE -void DTL_Uninitialize(void) -{ -#if AOCL_DTL_TRACE_ENABLE - /* Close the trace file */ - AOCL_FLIST_CloseAll(gpTraceFileList); -#endif - -#if AOCL_DTL_LOG_ENABLE - /* Close the log file */ - AOCL_FLIST_CloseAll(gpLogFileList); -#endif - -#if AOCL_DTL_AUTO_TRACE_ENABLE - /* Close the log file */ - AOCL_FLIST_CloseAll(gpAutoTraceFileList); -#endif - return; -} /* DTL_Uninitialise */ -#endif - -/*=================================================================== -* Function Name : DTL_Trace -* Description : This is common lowest level function -* to log the event to a file, This function -* will take case of choosing correct file -* according to the current thread and -* log the event as per format requested. - -* Input Parameter(s) : ui8LogLevel - Log Level -* ui8LogType - Identify log type (entry, exit etc) -* pi8FileName.- File name -* pi8FunctionName - Function Name -* ui32LineNumber - Line number -* pi8Message - Message to be printed -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) -void DTL_Trace( - uint8 ui8LogLevel, - uint8 ui8LogType, - const int8 *pi8FileName, - const int8 *pi8FunctionName, - uint32 ui32LineNumber, - const int8 *pi8Message) -{ - uint8 i = 0; - AOCL_FAL_FILE *pOutFile = NULL; - - if (ui8LogType == TRACE_TYPE_LOG || ui8LogType == TRACE_TYPE_RAW) - { -#if AOCL_DTL_LOG_ENABLE - pOutFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to the file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } -#endif /* Logging enabled */ - } - else - { -#if AOCL_DTL_TRACE_ENABLE - pOutFile = AOCL_FLIST_GetFile(gpTraceFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } -#endif /* Trace Enabled */ - } - - /* Log the message only if the log level is less than or equal to global log - level set while initialization */ - if (ui8LogLevel <= gui32TraceLogLevel) - { - - /* Indent as per level if is function call trace */ - if ((ui8LogLevel >= AOCL_DTL_LEVEL_TRACE_1) && - (ui8LogLevel <= AOCL_DTL_LEVEL_TRACE_8)) - { - /* this loop is for formating the output log file */ - for (i = 0; i < (ui8LogLevel - AOCL_DTL_LEVEL_TRACE_1); i++) - { - /* print tabs in the output file */ - fprintf(pOutFile, "\t"); - } - } - - switch (ui8LogType) - { - case TRACE_TYPE_FENTRY: - fprintf(pOutFile, "In %s()...\n", pi8FunctionName); - break; - - case TRACE_TYPE_FEXIT: - if (pi8Message == NULL) - { /* Function returned successfully */ - fprintf(pOutFile, "Out of %s()\n", pi8FunctionName); - } - else - { /* Function failed to complete, use message to get error */ - fprintf(pOutFile, "Out of %s() with error %s\n", pi8FunctionName, pi8Message); - } - break; - - case TRACE_TYPE_LOG: - fprintf(pOutFile, "%s:%d:%s\n", pi8FileName, ui32LineNumber, pi8Message); - break; - - case TRACE_TYPE_RAW: - fprintf(pOutFile, "%s\n", pi8Message); - break; - } - fflush(pOutFile); - } -} /* DTL_Data_Trace_Entry */ -#endif - -/*=================================================================== -* Function Name : DTL_DumpData -* Description : This function is mainly used for dumping -* the data into the file -* Input Parameter(s) : pui8Buffer - the buffer to be dumped -* ui32BufferSize.- the no. of bytes to be dumped -* ui8DataType - the data type char/int32/int32 -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#if AOCL_DTL_DUMP_ENABLE -void DTL_DumpData( - uint8 ui8LogLevel, - void *pvBuffer, - uint32 ui32BufferSize, - uint8 ui8DataType, - int8 *pi8Message, - int8 i8OutputType) -{ - uint32 j; - - /* Pointer to store the buffer */ - uint32 *pui32Array, ui32LocalData; - uint16 *pui16Array; - uint8 *pui8CharArray; - int8 *pi8CharString; - - /* If dump (log) file pointer is equal to NULL return with out dumping data to file */ - AOCL_FAL_FILE *pDumpFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); - /* Log the message only if the log level is less than or equal to global log - level set while initialization */ - if (ui8LogLevel > gui32TraceLogLevel) - { - return; - } - - /* The string message */ - if (pi8Message != NULL) - { - fprintf(pDumpFile, "%s :", pi8Message); - } - - /* Assuming that if the Data type for character = 1 - * the Data type for uint32 = 2 - * the data type for uint32 = 4 - * the data type for string = 3 - */ - if (ui8DataType == AOCL_STRING_DATA_TYPE) - { - /* Typecast the void buffer to character buffer */ - pi8CharString = (int8 *)pvBuffer; - fprintf(pDumpFile, "%s", pi8CharString); - fprintf(pDumpFile, "\n"); - } - - if (ui8DataType == AOCL_CHAR_DATA_TYPE) - { - /* Typecast the void buffer to character buffer */ - pui8CharArray = (uint8 *)pvBuffer; - - for (j = 0; j < ui32BufferSize; j++) - { - if (i8OutputType == AOCL_LOG_HEX_VALUE) - { - fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui8CharArray[j]); - } - else - { - fprintf(pDumpFile, "\n\t%5d:%u", j, pui8CharArray[j]); - } - } - fprintf(pDumpFile, "\n"); - } - - if (ui8DataType == AOCL_UINT16_DATA_TYPE) - { - /* Typecast the void buffer to uint32 bit buffer */ - pui16Array = (uint16 *)pvBuffer; - - /* dump the data in the file line by line */ - for (j = 0; j < ui32BufferSize; j++) - { - if (i8OutputType == AOCL_LOG_HEX_VALUE) - { - fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui16Array[j]); - } - else - { - fprintf(pDumpFile, "\n\t%5d:%u", j, pui16Array[j]); - } - } - fprintf(pDumpFile, "\n"); - - } /* End of if */ - - if (ui8DataType == AOCL_UINT32_DATA_TYPE) - { - /* Typecast the void buffer to uint32 buffer */ - pui32Array = (uint32 *)pvBuffer; - - /* dump the data in the file line by line */ - for (j = 0; j < ui32BufferSize; j++) - { - ui32LocalData = pui32Array[j]; - - if (i8OutputType == AOCL_LOG_HEX_VALUE) - { - fprintf(pDumpFile, "\n\t%5d:0x%x", j, ui32LocalData); - } - else - { - fprintf(pDumpFile, "\n\t%5d:%u", j, ui32LocalData); - } - } - fprintf(pDumpFile, "\n"); - } /* End of if */ - fflush(pDumpFile); -} /* DTL_DumpData */ -#endif - -/* This is enabled by passing ETRACE_ENABLE=1 to make */ -#ifdef AOCL_DTL_AUTO_TRACE_ENABLE - -/* - Disable intrumentation for these functions as they will also be - called from compiler generated instumation code to trace - function execution. - - It needs to be part of declration in the C file so can't be - moved to header file. - - WARNING: These functions are automatically invoked. however any function - called from this should have instumtation disable to avoid recursive - calls which results in hang/crash. - */ -void __cyg_profile_func_enter(void *this_fn, void *call_site) __attribute__((no_instrument_function)); -void __cyg_profile_func_exit(void *this_fn, void *call_site) __attribute__((no_instrument_function)); - -/*=================================================================== -* Function Name : __cyg_profile_func_enter -* Description : This function is automatically invoked -* by compiler instrumntation when the flow -* enters a function. -* Input Parameter(s) : pvThisFunc - Address of function entered. -* call_site.- Address of the caller -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -void __cyg_profile_func_enter(void *pvThisFunc, void *pvCaller) -{ - Dl_info info; - dladdr(pvThisFunc, &info); - - AOCL_FAL_FILE *pOutFile = NULL; - - pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to the file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } - - fprintf(pOutFile, "\n%lu:+:%p", - AOCL_getTimestamp(), - (void *)(pvThisFunc - info.dli_fbase)); - fflush(pOutFile); -} - -/*=================================================================== -* Function Name : __cyg_profile_func_exit -* Description : This function is automatically invoked -* by compiler before returing from a -* function. -* Input Parameter(s) : pvThisFunc - Address of function to be existed. -* call_site.- Address of the caller -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -void __cyg_profile_func_exit(void *pvThisFunc, void *pvCaller) -{ - Dl_info info; - dladdr(pvThisFunc, &info); - AOCL_FAL_FILE *pOutFile = NULL; - - pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to the file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } - - fprintf(pOutFile, "\n%lu:-:%p", - AOCL_getTimestamp(), - (void *)(pvThisFunc - info.dli_fbase)); - fflush(pOutFile); -} - -#endif /* AOCL_AUTO_TRACE_ENABLE */ - -/* ------------------ End of aocldtl.c ---------------------- */ +/*=================================================================== + * File Name : aocldtl.c + * + * Description : This file contains main logging functions. + * These functions are invoked though macros by + * end user. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#include "aocltpdef.h" +#include "aocldtl.h" +#include "aoclfal.h" +#include "aocldtlcf.h" +#include "aoclflist.h" +#include "aoclos.h" + +#ifdef AOCL_DTL_AUTO_TRACE_ENABLE +#if defined(__linux__) +#define __USE_GNU +#include +#endif +#endif + +/* By default the trace level will be set to ALL User can configure this + parameter at run time using command line argument */ +uint32 gui32TraceLogLevel = AOCL_DTL_TRACE_LEVEL; + +/* The user can configure the file name in which he wants to dump the data */ +#if AOCL_DTL_TRACE_ENABLE +/* The file name for storing traced log added manually in the code */ +static char *pchDTL_TRACE_FILE = AOCL_DTL_TRACE_FILE; + +/* Global file pointer for trace logging */ +AOCL_FLIST_Node *gpTraceFileList = NULL; + +#endif + +#if AOCL_DTL_LOG_ENABLE +/* The file name for storing log data */ +static char *pchDTL_LOG_FILE = AOCL_DTL_LOG_FILE; + +/* Global file pointer for logging the results */ +AOCL_FLIST_Node *gpLogFileList = NULL; +#endif + +#if AOCL_DTL_AUTO_TRACE_ENABLE + +/* The file name for storing execution trace, + These files are used by compiler assisted execution testing */ +static char *pchDTL_AUTO_TRACE_FILE = AOCL_DTL_AUTO_TRACE_FILE; + +/* Global file pointer for logging the results */ +AOCL_FLIST_Node *gpAutoTraceFileList = NULL; +#endif + +/*=================================================================== +* Function Name : DTL_Initialize +* Description : Creates/Opens log file and initializes the +* global trace log level +* Input Parameter(s) : ui32CurrentLogLevel - current log level +* which user can configure at run time +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#ifdef AOCL_DTL_INITIALIZE_ENABLE + +void DTL_Initialize( + uint32 ui32CurrentLogLevel) +{ + /* If user selects invalid trace log level then the dafault trace log level + will be AOCL_DTL_LEVEL_ALL */ + if ((ui32CurrentLogLevel < 1) || (ui32CurrentLogLevel > AOCL_DTL_LEVEL_ALL)) + { + gui32TraceLogLevel = AOCL_DTL_LEVEL_ALL; + } + else + { + /* Assign the user requested log level to the global trace log level */ + gui32TraceLogLevel = ui32CurrentLogLevel; + } + +#if AOCL_DTL_TRACE_ENABLE + /* Create/Open the file to log the traced data */ + AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); + + if (NULL == gpTraceFileList) + { + /* Unable to open the specified file.*/ + AOCL_DEBUGPRINT("Unable to create the trace file %s\n", pchDTL_TRACE_FILE); + return; + } +#endif + +#if AOCL_DTL_LOG_ENABLE + /* Create/Open the file to log the log data */ + AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); + + if (NULL == gpLogFileList) + { + /* Unable to open the specified file.*/ + AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_LOG_FILE); + return; + } +#endif + +#if AOCL_DTL_AUTO_TRACE_ENABLE + /* Create/Open the file to log the log data */ + AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); + + if (NULL == gpAutoTraceFileList) + { + /* Unable to open the specified file.*/ + AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_AUTO_TRACE_FILE); + return; + } +#endif + +} /* DTL_Initialize */ +#endif + +/*=================================================================== +* Function Name : DTL_Uninitialize +* Description : Close all the log files +* Input Parameter(s) : void +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#ifdef AOCL_DTL_INITIALIZE_ENABLE +void DTL_Uninitialize(void) +{ +#if AOCL_DTL_TRACE_ENABLE + /* Close the trace file */ + AOCL_FLIST_CloseAll(gpTraceFileList); +#endif + +#if AOCL_DTL_LOG_ENABLE + /* Close the log file */ + AOCL_FLIST_CloseAll(gpLogFileList); +#endif + +#if AOCL_DTL_AUTO_TRACE_ENABLE + /* Close the log file */ + AOCL_FLIST_CloseAll(gpAutoTraceFileList); +#endif + return; +} /* DTL_Uninitialise */ +#endif + +/*=================================================================== +* Function Name : DTL_Trace +* Description : This is common lowest level function +* to log the event to a file, This function +* will take case of choosing correct file +* according to the current thread and +* log the event as per format requested. + +* Input Parameter(s) : ui8LogLevel - Log Level +* ui8LogType - Identify log type (entry, exit etc) +* pi8FileName.- File name +* pi8FunctionName - Function Name +* ui32LineNumber - Line number +* pi8Message - Message to be printed +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) +void DTL_Trace( + uint8 ui8LogLevel, + uint8 ui8LogType, + const int8 *pi8FileName, + const int8 *pi8FunctionName, + uint32 ui32LineNumber, + const int8 *pi8Message) +{ + uint8 i = 0; + AOCL_FAL_FILE *pOutFile = NULL; + + if (ui8LogType == TRACE_TYPE_LOG || ui8LogType == TRACE_TYPE_RAW) + { +#if AOCL_DTL_LOG_ENABLE + pOutFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to the file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } +#endif /* Logging enabled */ + } + else + { +#if AOCL_DTL_TRACE_ENABLE + pOutFile = AOCL_FLIST_GetFile(gpTraceFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } +#endif /* Trace Enabled */ + } + + /* Log the message only if the log level is less than or equal to global log + level set while initialization */ + if (ui8LogLevel <= gui32TraceLogLevel) + { + + /* Indent as per level if is function call trace */ + if ((ui8LogLevel >= AOCL_DTL_LEVEL_TRACE_1) && + (ui8LogLevel <= AOCL_DTL_LEVEL_TRACE_8)) + { + /* this loop is for formating the output log file */ + for (i = 0; i < (ui8LogLevel - AOCL_DTL_LEVEL_TRACE_1); i++) + { + /* print tabs in the output file */ + fprintf(pOutFile, "\t"); + } + } + + switch (ui8LogType) + { + case TRACE_TYPE_FENTRY: + fprintf(pOutFile, "In %s()...\n", pi8FunctionName); + break; + + case TRACE_TYPE_FEXIT: + if (pi8Message == NULL) + { /* Function returned successfully */ + fprintf(pOutFile, "Out of %s()\n", pi8FunctionName); + } + else + { /* Function failed to complete, use message to get error */ + fprintf(pOutFile, "Out of %s() with error %s\n", pi8FunctionName, pi8Message); + } + break; + + case TRACE_TYPE_LOG: + fprintf(pOutFile, "%s:%d:%s\n", pi8FileName, ui32LineNumber, pi8Message); + break; + + case TRACE_TYPE_RAW: + fprintf(pOutFile, "%s\n", pi8Message); + break; + } + fflush(pOutFile); + } +} /* DTL_Data_Trace_Entry */ +#endif + +/*=================================================================== +* Function Name : DTL_DumpData +* Description : This function is mainly used for dumping +* the data into the file +* Input Parameter(s) : pui8Buffer - the buffer to be dumped +* ui32BufferSize.- the no. of bytes to be dumped +* ui8DataType - the data type char/int32/int32 +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#if AOCL_DTL_DUMP_ENABLE +void DTL_DumpData( + uint8 ui8LogLevel, + void *pvBuffer, + uint32 ui32BufferSize, + uint8 ui8DataType, + int8 *pi8Message, + int8 i8OutputType) +{ + uint32 j; + + /* Pointer to store the buffer */ + uint32 *pui32Array, ui32LocalData; + uint16 *pui16Array; + uint8 *pui8CharArray; + int8 *pi8CharString; + + /* If dump (log) file pointer is equal to NULL return with out dumping data to file */ + AOCL_FAL_FILE *pDumpFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); + /* Log the message only if the log level is less than or equal to global log + level set while initialization */ + if (ui8LogLevel > gui32TraceLogLevel) + { + return; + } + + /* The string message */ + if (pi8Message != NULL) + { + fprintf(pDumpFile, "%s :", pi8Message); + } + + /* Assuming that if the Data type for character = 1 + * the Data type for uint32 = 2 + * the data type for uint32 = 4 + * the data type for string = 3 + */ + if (ui8DataType == AOCL_STRING_DATA_TYPE) + { + /* Typecast the void buffer to character buffer */ + pi8CharString = (int8 *)pvBuffer; + fprintf(pDumpFile, "%s", pi8CharString); + fprintf(pDumpFile, "\n"); + } + + if (ui8DataType == AOCL_CHAR_DATA_TYPE) + { + /* Typecast the void buffer to character buffer */ + pui8CharArray = (uint8 *)pvBuffer; + + for (j = 0; j < ui32BufferSize; j++) + { + if (i8OutputType == AOCL_LOG_HEX_VALUE) + { + fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui8CharArray[j]); + } + else + { + fprintf(pDumpFile, "\n\t%5d:%u", j, pui8CharArray[j]); + } + } + fprintf(pDumpFile, "\n"); + } + + if (ui8DataType == AOCL_UINT16_DATA_TYPE) + { + /* Typecast the void buffer to uint32 bit buffer */ + pui16Array = (uint16 *)pvBuffer; + + /* dump the data in the file line by line */ + for (j = 0; j < ui32BufferSize; j++) + { + if (i8OutputType == AOCL_LOG_HEX_VALUE) + { + fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui16Array[j]); + } + else + { + fprintf(pDumpFile, "\n\t%5d:%u", j, pui16Array[j]); + } + } + fprintf(pDumpFile, "\n"); + + } /* End of if */ + + if (ui8DataType == AOCL_UINT32_DATA_TYPE) + { + /* Typecast the void buffer to uint32 buffer */ + pui32Array = (uint32 *)pvBuffer; + + /* dump the data in the file line by line */ + for (j = 0; j < ui32BufferSize; j++) + { + ui32LocalData = pui32Array[j]; + + if (i8OutputType == AOCL_LOG_HEX_VALUE) + { + fprintf(pDumpFile, "\n\t%5d:0x%x", j, ui32LocalData); + } + else + { + fprintf(pDumpFile, "\n\t%5d:%u", j, ui32LocalData); + } + } + fprintf(pDumpFile, "\n"); + } /* End of if */ + fflush(pDumpFile); +} /* DTL_DumpData */ +#endif + +/* This is enabled by passing ETRACE_ENABLE=1 to make */ +#ifdef AOCL_DTL_AUTO_TRACE_ENABLE + +/* + Disable intrumentation for these functions as they will also be + called from compiler generated instumation code to trace + function execution. + + It needs to be part of declration in the C file so can't be + moved to header file. + + WARNING: These functions are automatically invoked. however any function + called from this should have instumtation disable to avoid recursive + calls which results in hang/crash. + */ +void __cyg_profile_func_enter(void *this_fn, void *call_site) __attribute__((no_instrument_function)); +void __cyg_profile_func_exit(void *this_fn, void *call_site) __attribute__((no_instrument_function)); + +/*=================================================================== +* Function Name : __cyg_profile_func_enter +* Description : This function is automatically invoked +* by compiler instrumntation when the flow +* enters a function. +* Input Parameter(s) : pvThisFunc - Address of function entered. +* call_site.- Address of the caller +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +void __cyg_profile_func_enter(void *pvThisFunc, void *pvCaller) +{ + Dl_info info; + dladdr(pvThisFunc, &info); + + AOCL_FAL_FILE *pOutFile = NULL; + + pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to the file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } + + fprintf(pOutFile, "\n%lu:+:%p", + AOCL_getTimestamp(), + (void *)(pvThisFunc - info.dli_fbase)); + fflush(pOutFile); +} + +/*=================================================================== +* Function Name : __cyg_profile_func_exit +* Description : This function is automatically invoked +* by compiler before returing from a +* function. +* Input Parameter(s) : pvThisFunc - Address of function to be existed. +* call_site.- Address of the caller +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +void __cyg_profile_func_exit(void *pvThisFunc, void *pvCaller) +{ + Dl_info info; + dladdr(pvThisFunc, &info); + AOCL_FAL_FILE *pOutFile = NULL; + + pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to the file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } + + fprintf(pOutFile, "\n%lu:-:%p", + AOCL_getTimestamp(), + (void *)(pvThisFunc - info.dli_fbase)); + fflush(pOutFile); +} + +#endif /* AOCL_AUTO_TRACE_ENABLE */ + +/* ------------------ End of aocldtl.c ---------------------- */ diff --git a/AOCL_DTL/aocldtl.h b/AOCL_DTL/aocldtl.h index bbd610e3..9e8af18b 100644 --- a/AOCL_DTL/aocldtl.h +++ b/AOCL_DTL/aocldtl.h @@ -1,169 +1,169 @@ -/*=================================================================== - * File Name : aocldtl.h - * - * Description : This is main interface file for the end user - * It provides defination for all macros to be - * used by user to add debug/trace information. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#ifndef _AOCLDTL_H_ -#define _AOCLDTL_H_ - -#include "aocldtlcf.h" -#include "aocltpdef.h" -#include "aoclflist.h" - -#define TRACE_TYPE_FENTRY (1) -#define TRACE_TYPE_FEXIT (2) -#define TRACE_TYPE_LOG (3) -#define TRACE_TYPE_RAW (4) - -/* Type definition for printf */ -#define AOCL_DEBUGPRINT printf - -/* Customization for scalapack */ -#if AOCL_DTL_LOG_ENABLE - #define BUFF_SIZE 256 - #define BUFFER buffer - /*Variable Argument macro for snprintf*/ - #define AOCL_DTL_SNPRINTF(...) snprintf(BUFFER,BUFF_SIZE,__VA_ARGS__) - -#else - #define AOCL_DTL_SNPRINTF(...) - -#endif - - -/* Define the AOCL_DTL_INITIALIZE_ENABLE if any of the debug macro - * are defined */ -#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_DUMP_ENABLE || AOCL_DTL_LOG_ENABLE) -#define AOCL_DTL_INITIALIZE_ENABLE -#endif - -#if AOCL_DTL_TRACE_ENABLE -/* Entry macro to trace the flow of control The parameter LogLevel specifies - the log level String will preferably contains the function name in which - this macro is invoked */ -#define AOCL_DTL_TRACE_ENTRY(LogLevel) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_FENTRY, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - NULL); -#else -/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ -#define AOCL_DTL_TRACE_ENTRY(LogLevel) -#endif - -#if AOCL_DTL_TRACE_ENABLE -/* Exit macro to trace the flow of control The parameter LogLevel specifies - log level String will preferably contains the function name in which this - macro is invoked */ -#define AOCL_DTL_TRACE_EXIT(LogLevel) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_FEXIT, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - NULL); - -#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_FEXIT, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - Message); -#else -/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ -#define AOCL_DTL_TRACE_EXIT(LogLevel) -#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) -#endif - -#if AOCL_DTL_DUMP_ENABLE -/* Macro to Dump the DATA The parameters Buffer contains the data to be - dumped BufferSize specifies the no. of bytes to be dumped DataType - specifies the data type of Buffer */ -#define AOCL_DTL_DUMP(LogLevel, Buffer, BufferSize, DataType, String, OutputType) \ - /* Call the Dump function to Dump the DATA */ \ - DTL_DumpData(LogLevel, \ - Buffer, \ - BufferSize, \ - DataType, \ - String, \ - OutputType); -#else -/* Dummy macro definition if the AOCL_DTL_DUMP_ENABLE macro is not enabled */ -#define AOCL_DTL_DUMP(Buffer, BufferSize, DataType, String, OutputType) - -#endif - -#if AOCL_DTL_LOG_ENABLE -/* Macro to log the Data */ -#define AOCL_DTL_LOG(LogLevel, Message) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_LOG, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - Message); -#else -/* Dummy macro definition if the AOCL_DTL_LOG_ENABLE macro is not enabled */ -#define AOCL_DTL_LOG(LogLevel, Message) -#endif - -/* Macro to initialize the prerequisite for debuging */ -#ifdef AOCL_DTL_INITIALIZE_ENABLE -#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) \ - DTL_Initialize(CURRENT_LOG_LEVEL); -#else -/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ -#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) -#endif - -/* Macro for uninitializing the prerequisite */ -#ifdef AOCL_DTL_INITIALIZE_ENABLE -#define AOCL_DTL_UNINITIALIZE() \ - DTL_Uninitialize(); -#else -/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ -#define AOCL_DTL_UNINITIALIZE() -#endif - -#ifdef AOCL_DTL_INITIALIZE_ENABLE -/* Prototypes for initializing and uninitializing the debug functions */ -void DTL_Initialize( - uint32 ui32CurrentLogLevel); -void DTL_Uninitialize(void); -#endif - -#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) -/* Debug trace Function protoypes */ -void DTL_Trace( - uint8 ui8LogLevel, - uint8 ui8LogType, - const int8 *pi8FileName, - const int8 *pi8FunctionName, - uint32 ui32LineNumber, - const int8 *pi8Message); - -#endif - -#if AOCL_DTL_DUMP_ENABLE -/* Function Prototype for dumping the data */ -void DTL_DumpData( - uint8 ui8LogLevel, - void *pvBuffer, - uint32 ui32BufferSize, - uint8 ui8DataType, - int8 *pi8Message, - int8 i8OutputType); -#endif - -#endif /* _AOCLDTL_H_ */ - -/* --------------- End of aocldtl.h ----------------- */ +/*=================================================================== + * File Name : aocldtl.h + * + * Description : This is main interface file for the end user + * It provides defination for all macros to be + * used by user to add debug/trace information. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#ifndef _AOCLDTL_H_ +#define _AOCLDTL_H_ + +#include "aocldtlcf.h" +#include "aocltpdef.h" +#include "aoclflist.h" + +#define TRACE_TYPE_FENTRY (1) +#define TRACE_TYPE_FEXIT (2) +#define TRACE_TYPE_LOG (3) +#define TRACE_TYPE_RAW (4) + +/* Type definition for printf */ +#define AOCL_DEBUGPRINT printf + +/* Customization for scalapack */ +#if AOCL_DTL_LOG_ENABLE + #define BUFF_SIZE 256 + #define BUFFER buffer + /*Variable Argument macro for snprintf*/ + #define AOCL_DTL_SNPRINTF(...) snprintf(BUFFER,BUFF_SIZE,__VA_ARGS__) + +#else + #define AOCL_DTL_SNPRINTF(...) + +#endif + + +/* Define the AOCL_DTL_INITIALIZE_ENABLE if any of the debug macro + * are defined */ +#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_DUMP_ENABLE || AOCL_DTL_LOG_ENABLE) +#define AOCL_DTL_INITIALIZE_ENABLE +#endif + +#if AOCL_DTL_TRACE_ENABLE +/* Entry macro to trace the flow of control The parameter LogLevel specifies + the log level String will preferably contains the function name in which + this macro is invoked */ +#define AOCL_DTL_TRACE_ENTRY(LogLevel) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_FENTRY, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + NULL); +#else +/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ +#define AOCL_DTL_TRACE_ENTRY(LogLevel) +#endif + +#if AOCL_DTL_TRACE_ENABLE +/* Exit macro to trace the flow of control The parameter LogLevel specifies + log level String will preferably contains the function name in which this + macro is invoked */ +#define AOCL_DTL_TRACE_EXIT(LogLevel) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_FEXIT, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + NULL); + +#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_FEXIT, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + Message); +#else +/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ +#define AOCL_DTL_TRACE_EXIT(LogLevel) +#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) +#endif + +#if AOCL_DTL_DUMP_ENABLE +/* Macro to Dump the DATA The parameters Buffer contains the data to be + dumped BufferSize specifies the no. of bytes to be dumped DataType + specifies the data type of Buffer */ +#define AOCL_DTL_DUMP(LogLevel, Buffer, BufferSize, DataType, String, OutputType) \ + /* Call the Dump function to Dump the DATA */ \ + DTL_DumpData(LogLevel, \ + Buffer, \ + BufferSize, \ + DataType, \ + String, \ + OutputType); +#else +/* Dummy macro definition if the AOCL_DTL_DUMP_ENABLE macro is not enabled */ +#define AOCL_DTL_DUMP(Buffer, BufferSize, DataType, String, OutputType) + +#endif + +#if AOCL_DTL_LOG_ENABLE +/* Macro to log the Data */ +#define AOCL_DTL_LOG(LogLevel, Message) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_LOG, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + Message); +#else +/* Dummy macro definition if the AOCL_DTL_LOG_ENABLE macro is not enabled */ +#define AOCL_DTL_LOG(LogLevel, Message) +#endif + +/* Macro to initialize the prerequisite for debuging */ +#ifdef AOCL_DTL_INITIALIZE_ENABLE +#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) \ + DTL_Initialize(CURRENT_LOG_LEVEL); +#else +/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ +#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) +#endif + +/* Macro for uninitializing the prerequisite */ +#ifdef AOCL_DTL_INITIALIZE_ENABLE +#define AOCL_DTL_UNINITIALIZE() \ + DTL_Uninitialize(); +#else +/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ +#define AOCL_DTL_UNINITIALIZE() +#endif + +#ifdef AOCL_DTL_INITIALIZE_ENABLE +/* Prototypes for initializing and uninitializing the debug functions */ +void DTL_Initialize( + uint32 ui32CurrentLogLevel); +void DTL_Uninitialize(void); +#endif + +#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) +/* Debug trace Function protoypes */ +void DTL_Trace( + uint8 ui8LogLevel, + uint8 ui8LogType, + const int8 *pi8FileName, + const int8 *pi8FunctionName, + uint32 ui32LineNumber, + const int8 *pi8Message); + +#endif + +#if AOCL_DTL_DUMP_ENABLE +/* Function Prototype for dumping the data */ +void DTL_DumpData( + uint8 ui8LogLevel, + void *pvBuffer, + uint32 ui32BufferSize, + uint8 ui8DataType, + int8 *pi8Message, + int8 i8OutputType); +#endif + +#endif /* _AOCLDTL_H_ */ + +/* --------------- End of aocldtl.h ----------------- */ diff --git a/AOCL_DTL/aocldtlcf.h b/AOCL_DTL/aocldtlcf.h index 6f9cd945..a2198dbf 100644 --- a/AOCL_DTL/aocldtlcf.h +++ b/AOCL_DTL/aocldtlcf.h @@ -1,76 +1,76 @@ -/*=================================================================== - * File Name : aocldtlcf.h - * - * Description : This is configuration file for debug and trace - * libaray, all debug features (except auto trace) - * can be enabled/disabled in this file. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#ifndef _AOCLDTLCF_H_ -#define _AOCLDTLCF_H_ - -/* Macro for tracing the log If the user wants to enable tracing he has to - enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_TRACE_ENABLE 0 - -/* Macro for dumping the log If the user wants to enable dumping he has to - enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_DUMP_ENABLE 0 - -/* Macro for logging the logs If the user wants to enable loging information he - has to enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_LOG_ENABLE 0 - -/* Select the trace level till which you want to log the data */ -/* By default it will log for all levels */ -#define AOCL_DTL_TRACE_LEVEL AOCL_DTL_LEVEL_TRACE_5 - -/* user has to explicitly use the below macros to identify - ciriticality of the logged message */ -#define AOCL_DTL_LEVEL_ALL (14) -#define AOCL_DTL_LEVEL_TRACE_8 (13) -#define AOCL_DTL_LEVEL_TRACE_7 (12) /* Kernels */ -#define AOCL_DTL_LEVEL_TRACE_6 (11) -#define AOCL_DTL_LEVEL_TRACE_5 (10) -#define AOCL_DTL_LEVEL_TRACE_4 (9) -#define AOCL_DTL_LEVEL_TRACE_3 (8) -#define AOCL_DTL_LEVEL_TRACE_2 (7) -#define AOCL_DTL_LEVEL_TRACE_1 (6) /* BLIS/BLAS API */ -#define AOCL_DTL_LEVEL_VERBOSE (5) -#define AOCL_DTL_LEVEL_INFO (4) -#define AOCL_DTL_LEVEL_MINOR (3) -#define AOCL_DTL_LEVEL_MAJOR (2) -#define AOCL_DTL_LEVEL_CRITICAL (1) - - -#define AOCL_DTL_TRACE_FILE "aocldtl_trace.txt" -#define AOCL_DTL_AUTO_TRACE_FILE "aocldtl_auto_trace.rawfile" -#define AOCL_DTL_LOG_FILE "aocldtl_log.txt" - -/* The use can use below three macros for different data type while dumping data - * or specify the size of data type in bytes macro for character data type */ -#define AOCL_CHAR_DATA_TYPE (1) - -/* macro for short data type */ -#define AOCL_UINT16_DATA_TYPE (2) - -/* macro for String data type */ -#define AOCL_STRING_DATA_TYPE (3) - -/* macro for uint32 data type */ -#define AOCL_UINT32_DATA_TYPE (4) - -/* macro for printing Hex values */ -#define AOCL_LOG_HEX_VALUE ('x') - -/* macro for printing Decimal values */ -#define AOCL_LOG_DECIMAL_VALUE ('d') - - - -#endif /* _AOCLDTLCF_H_ */ - -/* --------------- End of aocldtlcf.h ----------------- */ +/*=================================================================== + * File Name : aocldtlcf.h + * + * Description : This is configuration file for debug and trace + * libaray, all debug features (except auto trace) + * can be enabled/disabled in this file. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#ifndef _AOCLDTLCF_H_ +#define _AOCLDTLCF_H_ + +/* Macro for tracing the log If the user wants to enable tracing he has to + enable this macro by making it to 1 else 0 */ +#define AOCL_DTL_TRACE_ENABLE 0 + +/* Macro for dumping the log If the user wants to enable dumping he has to + enable this macro by making it to 1 else 0 */ +#define AOCL_DTL_DUMP_ENABLE 0 + +/* Macro for logging the logs If the user wants to enable loging information he + has to enable this macro by making it to 1 else 0 */ +#define AOCL_DTL_LOG_ENABLE 0 + +/* Select the trace level till which you want to log the data */ +/* By default it will log for all levels */ +#define AOCL_DTL_TRACE_LEVEL AOCL_DTL_LEVEL_TRACE_5 + +/* user has to explicitly use the below macros to identify + ciriticality of the logged message */ +#define AOCL_DTL_LEVEL_ALL (14) +#define AOCL_DTL_LEVEL_TRACE_8 (13) +#define AOCL_DTL_LEVEL_TRACE_7 (12) /* Kernels */ +#define AOCL_DTL_LEVEL_TRACE_6 (11) +#define AOCL_DTL_LEVEL_TRACE_5 (10) +#define AOCL_DTL_LEVEL_TRACE_4 (9) +#define AOCL_DTL_LEVEL_TRACE_3 (8) +#define AOCL_DTL_LEVEL_TRACE_2 (7) +#define AOCL_DTL_LEVEL_TRACE_1 (6) /* BLIS/BLAS API */ +#define AOCL_DTL_LEVEL_VERBOSE (5) +#define AOCL_DTL_LEVEL_INFO (4) +#define AOCL_DTL_LEVEL_MINOR (3) +#define AOCL_DTL_LEVEL_MAJOR (2) +#define AOCL_DTL_LEVEL_CRITICAL (1) + + +#define AOCL_DTL_TRACE_FILE "aocldtl_trace.txt" +#define AOCL_DTL_AUTO_TRACE_FILE "aocldtl_auto_trace.rawfile" +#define AOCL_DTL_LOG_FILE "aocldtl_log.txt" + +/* The use can use below three macros for different data type while dumping data + * or specify the size of data type in bytes macro for character data type */ +#define AOCL_CHAR_DATA_TYPE (1) + +/* macro for short data type */ +#define AOCL_UINT16_DATA_TYPE (2) + +/* macro for String data type */ +#define AOCL_STRING_DATA_TYPE (3) + +/* macro for uint32 data type */ +#define AOCL_UINT32_DATA_TYPE (4) + +/* macro for printing Hex values */ +#define AOCL_LOG_HEX_VALUE ('x') + +/* macro for printing Decimal values */ +#define AOCL_LOG_DECIMAL_VALUE ('d') + + + +#endif /* _AOCLDTLCF_H_ */ + +/* --------------- End of aocldtlcf.h ----------------- */ diff --git a/AOCL_DTL/aoclfal.c b/AOCL_DTL/aoclfal.c index a317e69c..1eadf99b 100644 --- a/AOCL_DTL/aoclfal.c +++ b/AOCL_DTL/aoclfal.c @@ -1,265 +1,265 @@ -/*=================================================================== - * File Name : aoclfal.c - * - * Description : Platform/os independed file handling API's - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#include "aocltpdef.h" -#include "aocldtl.h" -#include "aoclfal.h" - - - -/* Disable instrumentation for following function, since they are called from - * Auto Generated execution trace handlers. */ - -/* The FAL function declaration */ -int32 AOCL_FAL_Close( - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -int32 AOCL_FAL_Error( - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -AOCL_FAL_FILE *AOCL_FAL_Open( - const int8 *pchFileName, - const int8 *pchMode) __attribute__((no_instrument_function)); - -int32 AOCL_FAL_Read( - void *pvBuffer, - int32 i32Size, - int32 i32Count, - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -int32 AOCL_FAL_Write( - const void *pvBuffer, - int32 i32Size, - int32 iCount, - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -/*============================================================================= -* Function Name : AOCL_FAL_Open -* Description : Used for opening a file specified by name -* Input Parameter(s) : int8 *pchFileName - Stores the file name (path) -* int8 *pchMode - Specify the mode for opening file -* Output Parameter(s) : None -* Return parameter(s) : AOCL_FAL_FILE - If the file is opened successfully -* NULL - If there is any error while opening file -*============================================================================*/ -AOCL_FAL_FILE *AOCL_FAL_Open( - const int8 *pchFileName, - const int8 *pchMode) -{ - AOCL_FAL_FILE *fpFileOpen = NULL; - /* Open the file with provided by specified path and mode in which it should - be opened. Refer to FILE I/O operation help for getting mode types */ - fpFileOpen = fopen(pchFileName, pchMode); - /* If the file is not opened then NULL value should be returned */ - if (NULL == fpFileOpen) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Cannot open file: AOCL_FAL_Open()"); - } - return fpFileOpen; -} /* end of AOCL_FAL_Open */ - -/*============================================================================= -* Function Name : AOCL_FAL_Close -* Description : Used for closing a file specified by file pointer -* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer -* Output Parameter(s) : None -* Return parameter(s) : 0 - If the file is closed successfully -* AOCL_FAL_CLOSE_ERROR - For any error while closing file -* -*============================================================================*/ -int32 AOCL_FAL_Close( - AOCL_FAL_FILE *fpFilePointer) -{ - /* Return value for the file close */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_CLOSE_ERROR; - - /* Check whether the file pointer passed is valid or not */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not close file: AOCL_FAL_Close()"); - return i32RetVal; - } - - /* Close the file using the FILE pointer passed */ - i32RetVal = fclose(fpFilePointer); - - /* If the return value is non zero then it indicates an error */ - if (i32RetVal) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "Can't close file, Invalid file pointer passed"); - return i32RetVal; - } - - /* On successful closing of the file, function should return 0 */ - return i32RetVal; - -} /* End of AOCL_FAL_Close */ - -/*============================================================================= -* Function Name : AOCL_FAL_Read -* Description : Used for reading a file specified by file pointer. -* This function reads the specified number of bytes -* from the file into the buffer specified. The bytes -* read are returned by this function. -* Input Parameter(s) : int32 i32Size - Item size in bytes -* int32 i32Count - Maximum number of items to be read -* AOCL_FAL_FILE *fpFilePointer - File ptr to read from -* Output Parameter(s) : void *pvBuffer - Storage location of data -* Return parameter(s) : i32RetVal - Number of bytes read if successful -* AOCL_FAL_READ_ERROR - In case of error while reading -*============================================================================*/ -int32 AOCL_FAL_Read( - void *pvBuffer, - int32 i32Size, - int32 i32Count, - AOCL_FAL_FILE *fpFilePointer) -{ - /* Return value for the file read */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_READ_ERROR; - - /* Check pointer used for pointing the storage location data is valid */ - if (NULL == pvBuffer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "Can not read the file, Buffer pointer is NULL"); - return i32RetVal; - } - - /* Check whether file pointer passed is valid */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "Can not read the file, Buffer pointer is NULL"); - return i32RetVal; - } - - /* Read the file using file pointer */ - i32RetVal = fread(pvBuffer, i32Size, i32Count, fpFilePointer); - - if (i32RetVal != i32Count) - { - /* Check whether this is an end of file The AOCL_FAL_Error() will return - non-zero value to indicate an error */ - if (AOCL_FAL_Error(fpFilePointer)) /* AOCL_FAL_EndOfFile (fpFilePointer) */ - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "There is an error condition while file read"); - i32RetVal = AOCL_FAL_READ_ERROR; - } - /* This is condition where file read has encountered an end of file */ - else - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "End of file..."); - } - } - - /* The number of bytes read by the file read operation. - * This value may be less than the actual count, due to end of file - * or an error while reading the file */ - return i32RetVal; - -} /* End of AOCL_FAL_Read */ - -/*============================================================================= -* Function Name : AOCL_FAL_Write -* Description : Used for writing data to a file specified by file -* pointer. The number of bytes written to file are -* written by this function. -* Input Parameter(s) : const void *pvBuffer - Pointer to data location from -* where the data to be copied - int32 i32Size - Item size in bytes -* int32 i32Count - Maximum number of items to be -* written -* AOCL_FAL_FILE *fpFilePointer - File pointer to write to -* Output Parameter(s) : None -* Return parameter(s) : i32RetVal - Number of bytes written if successful -* AOCL_FAL_WRITE_ERROR - In case of error while writing -*============================================================================*/ -int32 AOCL_FAL_Write( - const void *pvBuffer, - int32 i32Size, - int32 iCount, - AOCL_FAL_FILE *fpFilePointer) -{ - /* Return value for write operation */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_WRITE_ERROR; - /* Check pointer used for pointing the storage location data is valid */ - if (NULL == pvBuffer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); - return i32RetVal; - } - - /* Check whether the file pointer passed is valid or not */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); - return i32RetVal; - } - - /* Write into the file specified by the file pointer */ - i32RetVal = fwrite(pvBuffer, i32Size, iCount, fpFilePointer); - - /* If the number of bytes written into the file are less than specified - * bytes then it is an error while file writing */ - if (i32RetVal != iCount) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "File write operation error"); - i32RetVal = AOCL_FAL_WRITE_ERROR; - } - - /* The return value of the file write operation */ - return i32RetVal; - -} /* End of AOCL_FAL_Write */ - -/*============================================================================= -* Function Name : AOCL_FAL_Error -* Description : Used for testing an error on the file specified -* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer -* Output Parameter(s) : None -* Return parameter(s) : non-zero - Indicates an end of file -* 0 - Indicates that function is successful -* non-zero - Indicates that there is some error -* AOCL_FAL_ERROR - Indicates error during the operation -*============================================================================*/ -int32 AOCL_FAL_Error( - AOCL_FAL_FILE *fpFilePointer) -{ - /* Used for storing the return value for ferror function */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_FERROR; - - /* Check whether the file pointer is NULL */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Invalid file pointer is passed"); - return i32RetVal; - } - - /* Call the ferror function to get an error on the file */ - i32RetVal = ferror(fpFilePointer); - - /* Check for the return value, it non-zero there is an error */ - if (i32RetVal) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "The file has some error"); - i32RetVal = AOCL_FAL_FERROR; - } - - /* In case of success, this function should return 0 */ - return i32RetVal; - -} /* End of AOCL_FAL_Error */ - -/* ------------------- End of aoclfal.c ----------------------- */ +/*=================================================================== + * File Name : aoclfal.c + * + * Description : Platform/os independed file handling API's + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#include "aocltpdef.h" +#include "aocldtl.h" +#include "aoclfal.h" + + + +/* Disable instrumentation for following function, since they are called from + * Auto Generated execution trace handlers. */ + +/* The FAL function declaration */ +int32 AOCL_FAL_Close( + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +int32 AOCL_FAL_Error( + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +AOCL_FAL_FILE *AOCL_FAL_Open( + const int8 *pchFileName, + const int8 *pchMode) __attribute__((no_instrument_function)); + +int32 AOCL_FAL_Read( + void *pvBuffer, + int32 i32Size, + int32 i32Count, + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +int32 AOCL_FAL_Write( + const void *pvBuffer, + int32 i32Size, + int32 iCount, + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +/*============================================================================= +* Function Name : AOCL_FAL_Open +* Description : Used for opening a file specified by name +* Input Parameter(s) : int8 *pchFileName - Stores the file name (path) +* int8 *pchMode - Specify the mode for opening file +* Output Parameter(s) : None +* Return parameter(s) : AOCL_FAL_FILE - If the file is opened successfully +* NULL - If there is any error while opening file +*============================================================================*/ +AOCL_FAL_FILE *AOCL_FAL_Open( + const int8 *pchFileName, + const int8 *pchMode) +{ + AOCL_FAL_FILE *fpFileOpen = NULL; + /* Open the file with provided by specified path and mode in which it should + be opened. Refer to FILE I/O operation help for getting mode types */ + fpFileOpen = fopen(pchFileName, pchMode); + /* If the file is not opened then NULL value should be returned */ + if (NULL == fpFileOpen) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Cannot open file: AOCL_FAL_Open()"); + } + return fpFileOpen; +} /* end of AOCL_FAL_Open */ + +/*============================================================================= +* Function Name : AOCL_FAL_Close +* Description : Used for closing a file specified by file pointer +* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer +* Output Parameter(s) : None +* Return parameter(s) : 0 - If the file is closed successfully +* AOCL_FAL_CLOSE_ERROR - For any error while closing file +* +*============================================================================*/ +int32 AOCL_FAL_Close( + AOCL_FAL_FILE *fpFilePointer) +{ + /* Return value for the file close */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_CLOSE_ERROR; + + /* Check whether the file pointer passed is valid or not */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not close file: AOCL_FAL_Close()"); + return i32RetVal; + } + + /* Close the file using the FILE pointer passed */ + i32RetVal = fclose(fpFilePointer); + + /* If the return value is non zero then it indicates an error */ + if (i32RetVal) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "Can't close file, Invalid file pointer passed"); + return i32RetVal; + } + + /* On successful closing of the file, function should return 0 */ + return i32RetVal; + +} /* End of AOCL_FAL_Close */ + +/*============================================================================= +* Function Name : AOCL_FAL_Read +* Description : Used for reading a file specified by file pointer. +* This function reads the specified number of bytes +* from the file into the buffer specified. The bytes +* read are returned by this function. +* Input Parameter(s) : int32 i32Size - Item size in bytes +* int32 i32Count - Maximum number of items to be read +* AOCL_FAL_FILE *fpFilePointer - File ptr to read from +* Output Parameter(s) : void *pvBuffer - Storage location of data +* Return parameter(s) : i32RetVal - Number of bytes read if successful +* AOCL_FAL_READ_ERROR - In case of error while reading +*============================================================================*/ +int32 AOCL_FAL_Read( + void *pvBuffer, + int32 i32Size, + int32 i32Count, + AOCL_FAL_FILE *fpFilePointer) +{ + /* Return value for the file read */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_READ_ERROR; + + /* Check pointer used for pointing the storage location data is valid */ + if (NULL == pvBuffer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "Can not read the file, Buffer pointer is NULL"); + return i32RetVal; + } + + /* Check whether file pointer passed is valid */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "Can not read the file, Buffer pointer is NULL"); + return i32RetVal; + } + + /* Read the file using file pointer */ + i32RetVal = fread(pvBuffer, i32Size, i32Count, fpFilePointer); + + if (i32RetVal != i32Count) + { + /* Check whether this is an end of file The AOCL_FAL_Error() will return + non-zero value to indicate an error */ + if (AOCL_FAL_Error(fpFilePointer)) /* AOCL_FAL_EndOfFile (fpFilePointer) */ + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "There is an error condition while file read"); + i32RetVal = AOCL_FAL_READ_ERROR; + } + /* This is condition where file read has encountered an end of file */ + else + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "End of file..."); + } + } + + /* The number of bytes read by the file read operation. + * This value may be less than the actual count, due to end of file + * or an error while reading the file */ + return i32RetVal; + +} /* End of AOCL_FAL_Read */ + +/*============================================================================= +* Function Name : AOCL_FAL_Write +* Description : Used for writing data to a file specified by file +* pointer. The number of bytes written to file are +* written by this function. +* Input Parameter(s) : const void *pvBuffer - Pointer to data location from +* where the data to be copied + int32 i32Size - Item size in bytes +* int32 i32Count - Maximum number of items to be +* written +* AOCL_FAL_FILE *fpFilePointer - File pointer to write to +* Output Parameter(s) : None +* Return parameter(s) : i32RetVal - Number of bytes written if successful +* AOCL_FAL_WRITE_ERROR - In case of error while writing +*============================================================================*/ +int32 AOCL_FAL_Write( + const void *pvBuffer, + int32 i32Size, + int32 iCount, + AOCL_FAL_FILE *fpFilePointer) +{ + /* Return value for write operation */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_WRITE_ERROR; + /* Check pointer used for pointing the storage location data is valid */ + if (NULL == pvBuffer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); + return i32RetVal; + } + + /* Check whether the file pointer passed is valid or not */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); + return i32RetVal; + } + + /* Write into the file specified by the file pointer */ + i32RetVal = fwrite(pvBuffer, i32Size, iCount, fpFilePointer); + + /* If the number of bytes written into the file are less than specified + * bytes then it is an error while file writing */ + if (i32RetVal != iCount) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "File write operation error"); + i32RetVal = AOCL_FAL_WRITE_ERROR; + } + + /* The return value of the file write operation */ + return i32RetVal; + +} /* End of AOCL_FAL_Write */ + +/*============================================================================= +* Function Name : AOCL_FAL_Error +* Description : Used for testing an error on the file specified +* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer +* Output Parameter(s) : None +* Return parameter(s) : non-zero - Indicates an end of file +* 0 - Indicates that function is successful +* non-zero - Indicates that there is some error +* AOCL_FAL_ERROR - Indicates error during the operation +*============================================================================*/ +int32 AOCL_FAL_Error( + AOCL_FAL_FILE *fpFilePointer) +{ + /* Used for storing the return value for ferror function */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_FERROR; + + /* Check whether the file pointer is NULL */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Invalid file pointer is passed"); + return i32RetVal; + } + + /* Call the ferror function to get an error on the file */ + i32RetVal = ferror(fpFilePointer); + + /* Check for the return value, it non-zero there is an error */ + if (i32RetVal) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "The file has some error"); + i32RetVal = AOCL_FAL_FERROR; + } + + /* In case of success, this function should return 0 */ + return i32RetVal; + +} /* End of AOCL_FAL_Error */ + +/* ------------------- End of aoclfal.c ----------------------- */ diff --git a/AOCL_DTL/aoclfal.h b/AOCL_DTL/aoclfal.h index 1e392733..56931d2d 100644 --- a/AOCL_DTL/aoclfal.h +++ b/AOCL_DTL/aoclfal.h @@ -1,51 +1,51 @@ -/*=================================================================== - * File Name : aoclfal.h - * - * Description : Interfaces for platform/os independed file - * handling API's - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#ifndef _AOCL_FAL_H_ -#define _AOCL_FAL_H_ - -/* The possible error values of FAL */ -#define AOCL_FAL_SUCCESS 0 -#define AOCL_FAL_CLOSE_ERROR -1 -#define AOCL_FAL_READ_ERROR -2 -#define AOCL_FAL_WRITE_ERROR -3 -#define AOCL_FAL_EOF_ERROR -6 -#define AOCL_FAL_FERROR -7 -#include "aocltpdef.h" - -/* The type definition for FILE */ -#define AOCL_FAL_FILE FILE - -/* The FAL function declaration */ -int32 AOCL_FAL_Close( - AOCL_FAL_FILE *fpFilePointer); - -int32 AOCL_FAL_Error( - AOCL_FAL_FILE *fpFilePointer); - -AOCL_FAL_FILE *AOCL_FAL_Open( - const int8 *pchFileName, - const int8 *pchMode); - -int32 AOCL_FAL_Read( - void *pvBuffer, - int32 i32Size, - int32 i32Count, - AOCL_FAL_FILE *fpFilePointer); - -int32 AOCL_FAL_Write( - const void *pvBuffer, - int32 i32Size, - int32 iCount, - AOCL_FAL_FILE *fpFilePointer); - -#endif /* _AOCL_FAL_H_ */ - -/* --------------- End of aoclfal.h ----------------- */ +/*=================================================================== + * File Name : aoclfal.h + * + * Description : Interfaces for platform/os independed file + * handling API's + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#ifndef _AOCL_FAL_H_ +#define _AOCL_FAL_H_ + +/* The possible error values of FAL */ +#define AOCL_FAL_SUCCESS 0 +#define AOCL_FAL_CLOSE_ERROR -1 +#define AOCL_FAL_READ_ERROR -2 +#define AOCL_FAL_WRITE_ERROR -3 +#define AOCL_FAL_EOF_ERROR -6 +#define AOCL_FAL_FERROR -7 +#include "aocltpdef.h" + +/* The type definition for FILE */ +#define AOCL_FAL_FILE FILE + +/* The FAL function declaration */ +int32 AOCL_FAL_Close( + AOCL_FAL_FILE *fpFilePointer); + +int32 AOCL_FAL_Error( + AOCL_FAL_FILE *fpFilePointer); + +AOCL_FAL_FILE *AOCL_FAL_Open( + const int8 *pchFileName, + const int8 *pchMode); + +int32 AOCL_FAL_Read( + void *pvBuffer, + int32 i32Size, + int32 i32Count, + AOCL_FAL_FILE *fpFilePointer); + +int32 AOCL_FAL_Write( + const void *pvBuffer, + int32 i32Size, + int32 iCount, + AOCL_FAL_FILE *fpFilePointer); + +#endif /* _AOCL_FAL_H_ */ + +/* --------------- End of aoclfal.h ----------------- */ diff --git a/AOCL_DTL/aocltpdef.h b/AOCL_DTL/aocltpdef.h index 896731c5..3098912c 100644 --- a/AOCL_DTL/aocltpdef.h +++ b/AOCL_DTL/aocltpdef.h @@ -1,42 +1,42 @@ - -/*=================================================================== - * File Name : aocltpdef.h - * - * Description : Abstraction for various datatypes used by DTL. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ -#ifndef AOCL_TYPEDEF_H_ -#define AOCL_TYPEDEF_H_ - -#include -#include -#include -#include -#include -#ifndef _WIN32 -#include -#else -typedef int pid_t; -#endif - -typedef double Double; -typedef float Float; -typedef void Void; -typedef unsigned char uint8; -typedef unsigned short int uint16; -typedef unsigned int uint32; -typedef unsigned long uint64; -typedef uint8 *STRING; -typedef unsigned char Bool; -typedef char int8; -typedef signed long int int32; -typedef short int int16; - -typedef Void *AOCL_HANDLE; -typedef pid_t AOCL_TID; - -#endif /*AOCL_TYPEDEF_H_ */ - -/* --------------- End of aocltpdef.h ----------------- */ + +/*=================================================================== + * File Name : aocltpdef.h + * + * Description : Abstraction for various datatypes used by DTL. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ +#ifndef AOCL_TYPEDEF_H_ +#define AOCL_TYPEDEF_H_ + +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#else +typedef int pid_t; +#endif + +typedef double Double; +typedef float Float; +typedef void Void; +typedef unsigned char uint8; +typedef unsigned short int uint16; +typedef unsigned int uint32; +typedef unsigned long uint64; +typedef uint8 *STRING; +typedef unsigned char Bool; +typedef char int8; +typedef signed long int int32; +typedef short int int16; + +typedef Void *AOCL_HANDLE; +typedef pid_t AOCL_TID; + +#endif /*AOCL_TYPEDEF_H_ */ + +/* --------------- End of aocltpdef.h ----------------- */ diff --git a/EXAMPLE/aocl_progress_example/pdgerf_example_app.c b/EXAMPLE/aocl_progress_example/pdgerf_example_app.c index 63c6db1d..b6ac9a10 100644 --- a/EXAMPLE/aocl_progress_example/pdgerf_example_app.c +++ b/EXAMPLE/aocl_progress_example/pdgerf_example_app.c @@ -1,132 +1,132 @@ -#include -#include -#include -#include -#include -#include -#include "mpi.h" - -void blacs_get_(int*, int*, int*); -void blacs_pinfo_(int*, int*); -void blacs_gridinit_(int*, char*, int*, int*); -void blacs_gridinfo_(int*, int*, int*, int*, int*); -void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); -void pdgetrf_(int*, int*, double*, int*, int*, int*, int*, int*); -void blacs_gridexit_(int*); -int numroc_(int*, int*, int*, int*, int*); - -int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes); - -int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes) -{ - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - int izero=0; - int ione=1; - int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - int n = 1000; // (Global) Matrix size - int nprow = 2; // Number of row procs - int npcol = 2; // Number of column procs - int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - int iam, nprocs; - int zero = 0; - int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - int *IPPIV; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (int *)calloc(2*n,sizeof(int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - int k = 0; - for (int j = 0; j < nqA; j++) { // local col - int l_j = j / nb; // which block - int x_j = j % nb; // where within that block - int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (int i = 0; i < mpA; i++) { // local row - int l_i = i / nb; // which block - int x_i = i % nb; // where within that block - int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - int descA[9]; - int info; - int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %d\n", info); - } - - // Run pdgetrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pdgetrf, info = %d\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#include +#include "mpi.h" + +void blacs_get_(int*, int*, int*); +void blacs_pinfo_(int*, int*); +void blacs_gridinit_(int*, char*, int*, int*); +void blacs_gridinfo_(int*, int*, int*, int*, int*); +void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); +void pdgetrf_(int*, int*, double*, int*, int*, int*, int*, int*); +void blacs_gridexit_(int*); +int numroc_(int*, int*, int*, int*, int*); + +int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes); + +int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes) +{ + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + int izero=0; + int ione=1; + int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + int n = 1000; // (Global) Matrix size + int nprow = 2; // Number of row procs + int npcol = 2; // Number of column procs + int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + int iam, nprocs; + int zero = 0; + int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + int *IPPIV; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (int *)calloc(2*n,sizeof(int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + int k = 0; + for (int j = 0; j < nqA; j++) { // local col + int l_j = j / nb; // which block + int x_j = j % nb; // where within that block + int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (int i = 0; i < mpA; i++) { // local row + int l_i = i / nb; // which block + int x_i = i % nb; // where within that block + int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + int descA[9]; + int info; + int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %d\n", info); + } + + // Run pdgetrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pdgetrf, info = %d\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c index 1de4f3dd..cd3e8052 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c @@ -1,156 +1,156 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_float float _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); - -/* Target API Prototype */ -void pcgerqf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, SL_complex_float*, SL_complex_float*, Int*, Int*); - -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = atoi(argv[2]); - } - if(argc > 3) { - mb = atoi(argv[3]); - } - if(argc > 4) { - nb = atoi(argv[4]); - } - if(argc > 5) { - nprow = atoi(argv[5]); - } - if(argc > 6) { - npcol = atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_float *A; - A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - SL_complex_float work_buffer_size; - SL_complex_float *work, *tau; - Int lwork = -1; - tau = (SL_complex_float *)calloc((mpA+nqA),sizeof(SL_complex_float)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (SL_complex_float *)calloc(work_buffer_size, sizeof(SL_complex_float)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pcgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in pcgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_float float _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); + +/* Target API Prototype */ +void pcgerqf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, SL_complex_float*, SL_complex_float*, Int*, Int*); + +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = atoi(argv[2]); + } + if(argc > 3) { + mb = atoi(argv[3]); + } + if(argc > 4) { + nb = atoi(argv[4]); + } + if(argc > 5) { + nprow = atoi(argv[5]); + } + if(argc > 6) { + npcol = atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_float *A; + A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + SL_complex_float work_buffer_size; + SL_complex_float *work, *tau; + Int lwork = -1; + tau = (SL_complex_float *)calloc((mpA+nqA),sizeof(SL_complex_float)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (SL_complex_float *)calloc(work_buffer_size, sizeof(SL_complex_float)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pcgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in pcgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c index b17c83bd..127a1c5e 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c @@ -1,141 +1,141 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_float float _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pcgetrf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_float *A; - Int *IPPIV; - A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pcgetrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pcgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pcgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pcgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_float float _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pcgetrf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_float *A; + Int *IPPIV; + A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pcgetrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pcgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pcgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pcgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c index 084f6387..b444c2b3 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c @@ -1,136 +1,136 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_float float _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pcpotrf_(char*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - //assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_float *A; - A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pcpotrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pcpotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pcpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pcpotrf, info = %d\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_float float _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pcpotrf_(char*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + //assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_float *A; + A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pcpotrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pcpotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pcpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pcpotrf, info = %d\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c index 06b3d2f0..230c2294 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c @@ -1,152 +1,152 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pdgerqf_(Int*, Int*, double*, Int*, Int*, Int*, double*, double*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = atoi(argv[2]); - } - if(argc > 3) { - mb = atoi(argv[3]); - } - if(argc > 4) { - nb = atoi(argv[4]); - } - if(argc > 5) { - nprow = atoi(argv[5]); - } - if(argc > 6) { - npcol = atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - double work_buffer_size; - double *work, *tau; - Int lwork = -1; - tau = (double *)calloc((mpA+nqA),sizeof(double)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (double *)calloc(work_buffer_size, sizeof(double)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in pdgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pdgerqf_(Int*, Int*, double*, Int*, Int*, Int*, double*, double*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = atoi(argv[2]); + } + if(argc > 3) { + mb = atoi(argv[3]); + } + if(argc > 4) { + nb = atoi(argv[4]); + } + if(argc > 5) { + nprow = atoi(argv[5]); + } + if(argc > 6) { + npcol = atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + double work_buffer_size; + double *work, *tau; + Int lwork = -1; + tau = (double *)calloc((mpA+nqA),sizeof(double)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (double *)calloc(work_buffer_size, sizeof(double)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in pdgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c index c4e11afb..71f2c2a1 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c @@ -1,139 +1,139 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pdgetrf_(Int*, Int*, double*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes); - -Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes) -{ - char api_name [30]; - memcpy(api_name, api, *lenapi); - api_name[*lenapi - 1] = '\0'; - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i \n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - Int *IPPIV; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pdgetrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pdgetrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pdgetrf_(Int*, Int*, double*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes); + +Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes) +{ + char api_name [30]; + memcpy(api_name, api, *lenapi); + api_name[*lenapi - 1] = '\0'; + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i \n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + Int *IPPIV; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pdgetrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pdgetrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c index 5da96566..307f916a 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c @@ -1,134 +1,134 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pdpotrf_(char*, Int*, double*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pdpotrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdpotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pdpotrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pdpotrf_(char*, Int*, double*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pdpotrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdpotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pdpotrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c index 98053ce5..2d4493fc 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c @@ -1,153 +1,153 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -//void pdpotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); -void pdgerqf_(Int*, Int*, float*, Int*, Int*, Int*, float*, float*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = atoi(argv[2]); - } - if(argc > 3) { - mb = atoi(argv[3]); - } - if(argc > 4) { - nb = atoi(argv[4]); - } - if(argc > 5) { - nprow = atoi(argv[5]); - } - if(argc > 6) { - npcol = atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - float *A; - A = (float *)calloc(mpA*nqA,sizeof(float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - float work_buffer_size; - float *work, *tau; - Int lwork = -1; - tau = (float *)calloc((mpA+nqA),sizeof(float)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (float *)calloc(work_buffer_size, sizeof(float)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting psgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in psgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +//void pdpotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); +void pdgerqf_(Int*, Int*, float*, Int*, Int*, Int*, float*, float*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = atoi(argv[2]); + } + if(argc > 3) { + mb = atoi(argv[3]); + } + if(argc > 4) { + nb = atoi(argv[4]); + } + if(argc > 5) { + nprow = atoi(argv[5]); + } + if(argc > 6) { + npcol = atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + float *A; + A = (float *)calloc(mpA*nqA,sizeof(float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + float work_buffer_size; + float *work, *tau; + Int lwork = -1; + tau = (float *)calloc((mpA+nqA),sizeof(float)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (float *)calloc(work_buffer_size, sizeof(float)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting psgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in psgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c index 6cf8e1dc..e574565c 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c @@ -1,140 +1,140 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void psgetrf_(Int*, Int*, float*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - float *A; - Int *IPPIV; - A = (float *)calloc(mpA*nqA,sizeof(float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run psgetrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting psgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - psgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in psgetrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void psgetrf_(Int*, Int*, float*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + float *A; + Int *IPPIV; + A = (float *)calloc(mpA*nqA,sizeof(float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run psgetrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting psgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + psgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in psgetrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c index 2eaf1b13..d91ccaa3 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c @@ -1,133 +1,133 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pspotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - //assert(nprow * npcol == nprocs_mpi); - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - float *A; - A = (float *)calloc(mpA*nqA,sizeof(float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pspotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pspotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pspotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pspotrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pspotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + //assert(nprow * npcol == nprocs_mpi); + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + float *A; + A = (float *)calloc(mpA*nqA,sizeof(float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pspotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pspotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pspotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pspotrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c index d1ae33c1..90e6880c 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c @@ -1,157 +1,157 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_double double _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); - -/* Target API Prototype */ -void pzgerqf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, SL_complex_double*, SL_complex_double*, Int*, Int*); - -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size_rows matrix_size_columns block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = (Int)atoi(argv[2]); - } - if(argc > 3) { - mb = (Int)atoi(argv[3]); - } - if(argc > 4) { - nb = (Int)atoi(argv[4]); - } - if(argc > 5) { - nprow = (Int)atoi(argv[5]); - } - if(argc > 6) { - npcol = (Int)atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_double *A; - A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - SL_complex_double work_buffer_size; - SL_complex_double *work, *tau; - Int lwork = -1; - tau = (SL_complex_double *)calloc((mpA+nqA),sizeof(SL_complex_double)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (SL_complex_double *)calloc(work_buffer_size, sizeof(SL_complex_double)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pzgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in pzgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_double double _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); + +/* Target API Prototype */ +void pzgerqf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, SL_complex_double*, SL_complex_double*, Int*, Int*); + +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size_rows matrix_size_columns block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = (Int)atoi(argv[2]); + } + if(argc > 3) { + mb = (Int)atoi(argv[3]); + } + if(argc > 4) { + nb = (Int)atoi(argv[4]); + } + if(argc > 5) { + nprow = (Int)atoi(argv[5]); + } + if(argc > 6) { + npcol = (Int)atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_double *A; + A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + SL_complex_double work_buffer_size; + SL_complex_double *work, *tau; + Int lwork = -1; + tau = (SL_complex_double *)calloc((mpA+nqA),sizeof(SL_complex_double)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (SL_complex_double *)calloc(work_buffer_size, sizeof(SL_complex_double)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pzgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in pzgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c index 8dd6171f..d91bd1df 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c @@ -1,141 +1,141 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_double double _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pzgetrf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_double *A; - Int *IPPIV; - A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pzgetrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pzgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pzgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pzgetrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_double double _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pzgetrf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_double *A; + Int *IPPIV; + A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pzgetrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pzgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pzgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pzgetrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c index 09803fb1..2d1aacda 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c @@ -1,136 +1,136 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_double double _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pzpotrf_(char*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_double *A; - A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pzpotrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pzpotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pzpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pzpotrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_double double _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pzpotrf_(char*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_double *A; + A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pzpotrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pzpotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pzpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pzpotrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} From ba30d9d43d62a7e75a5b9f51960dc1149559b5fc Mon Sep 17 00:00:00 2001 From: arunchan Date: Fri, 25 Nov 2022 11:34:55 +0530 Subject: [PATCH 04/30] if BLACS_LIBRARY is not set use the default without printing error Change-Id: Ifbfa43272c2d0bdaf87c95cfe44d895de1690b1b --- CMakeLists.txt | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f65f0689..84c735a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,18 +236,22 @@ IF(BLACS_LIBRARY) CHECK_FUNCTION_EXISTS("Cblacs_get" BLACS_FOUND) unset( CMAKE_REQUIRED_LIBRARIES ) message(STATUS "--> BLACS routine blacs_pinfo is found: ${BLACS_FOUND}.") -ENDIF() -if(BLACS_FOUND) - message(STATUS "--> BLACS supplied by user is WORKING, will use ${BLACS_LIBRARY}.") -else( BLACS_FOUND ) -# -# BLACS -# - add_subdirectory(BLACS) - append_subdir_files(blacs "BLACS/SRC") - message(STATUS "--> BLACS supplied by user is NOT WORKING, will use BLACS source code for building aocl-scalapack") -endif( BLACS_FOUND ) + if(BLACS_FOUND) + message(STATUS "--> BLACS supplied by user is WORKING, will use ${BLACS_LIBRARY}.") + else( BLACS_FOUND ) + # + # BLACS + # + add_subdirectory(BLACS) + append_subdir_files(blacs "BLACS/SRC") + message(STATUS "--> BLACS supplied by user is NOT WORKING, will use BLACS source code for building aocl-scalapack") + endif( BLACS_FOUND ) +ELSE(BLACS_LIBRARY) + add_subdirectory(BLACS) + append_subdir_files(blacs "BLACS/SRC") + message(STATUS "--> Using default BLACS source code for building aocl-scalapack") +ENDIF() message(STATUS "CHECKING BLAS AND LAPACK LIBRARIES") IF(LAPACK_LIBRARIES) From e29f39e1918eaee4a44b937cb4c67c7e93d38244 Mon Sep 17 00:00:00 2001 From: arunchan Date: Fri, 25 Nov 2022 11:48:26 +0530 Subject: [PATCH 05/30] Rename BLACS_FOUND to CUSTOM_BLACS_FOUND for clarity Change-Id: Icea30702de6ff55f705213d86355471605eb7639 --- CMakeLists.txt | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84c735a2..a85896a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -233,20 +233,20 @@ IF(BLACS_LIBRARY) include(CheckFunctionExists) set(CMAKE_REQUIRED_LIBRARIES ${BLACS_LIBRARY}) message(STATUS "--> BLACS supplied by user is ${BLACS_LIBRARY}.") - CHECK_FUNCTION_EXISTS("Cblacs_get" BLACS_FOUND) + CHECK_FUNCTION_EXISTS("Cblacs_get" CUSTOM_BLACS_FOUND) unset( CMAKE_REQUIRED_LIBRARIES ) - message(STATUS "--> BLACS routine blacs_pinfo is found: ${BLACS_FOUND}.") + message(STATUS "--> BLACS routine blacs_pinfo is found: ${CUSTOM_BLACS_FOUND}.") - if(BLACS_FOUND) + if(CUSTOM_BLACS_FOUND) message(STATUS "--> BLACS supplied by user is WORKING, will use ${BLACS_LIBRARY}.") - else( BLACS_FOUND ) + else( CUSTOM_BLACS_FOUND ) # # BLACS # add_subdirectory(BLACS) append_subdir_files(blacs "BLACS/SRC") message(STATUS "--> BLACS supplied by user is NOT WORKING, will use BLACS source code for building aocl-scalapack") - endif( BLACS_FOUND ) + endif( CUSTOM_BLACS_FOUND ) ELSE(BLACS_LIBRARY) add_subdirectory(BLACS) append_subdir_files(blacs "BLACS/SRC") @@ -345,34 +345,34 @@ if(WIN32 AND BUILD_SHARED_LIBS) endif () if (UNIX) - if(BLACS_FOUND) + if(CUSTOM_BLACS_FOUND) add_library(scalapack ${dtl} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) - else(BLACS_FOUND) + else(CUSTOM_BLACS_FOUND) add_library(scalapack ${dtl} ${blacs} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) - endif(BLACS_FOUND) + endif(CUSTOM_BLACS_FOUND) else (UNIX) # Need to separate Fortran and C Code if (CMAKE_C_COMPILER_ID MATCHES MSVC) # create Fortran objects and add to scalapack library first - if(BLACS_FOUND) + if(CUSTOM_BLACS_FOUND) add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) add_library(scalapack $ ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - else(BLACS_FOUND) + else(CUSTOM_BLACS_FOUND) add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) add_library(scalapack $ ${blacs} ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - endif(BLACS_FOUND) + endif(CUSTOM_BLACS_FOUND) else (CMAKE_C_COMPILER_ID MATCHES Clang) # create C objects and add to scalapack library first - if(BLACS_FOUND) + if(CUSTOM_BLACS_FOUND) add_library(scalapack-C OBJECT ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) @@ -380,7 +380,7 @@ else (UNIX) # Need to separate Fortran and C Code target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - else(BLACS_FOUND) + else(CUSTOM_BLACS_FOUND) add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) @@ -388,7 +388,7 @@ else (UNIX) # Need to separate Fortran and C Code target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - endif(BLACS_FOUND) + endif(CUSTOM_BLACS_FOUND) endif () endif (UNIX) From 32c2b529d9a893e84974e978b69b8587ecf80da4 Mon Sep 17 00:00:00 2001 From: arunchan Date: Fri, 25 Nov 2022 17:23:02 +0530 Subject: [PATCH 06/30] Allow the usage of same build directory if the user changes BLAS and LAPACK if cmake variable LAPACK_FOUND is set during the previous run, the new run ignores the current supplied -DLAPACK_LIBRARIES and -DBLAS_LIBRARIES. Unset it from cache to invoke the __dgesv__check__ Change-Id: Id58ee367a2191ec9aa1ad9227eea8a2c7f8d82ff --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a85896a6..37368a3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -253,6 +253,7 @@ ELSE(BLACS_LIBRARY) message(STATUS "--> Using default BLACS source code for building aocl-scalapack") ENDIF() +unset(LAPACK_FOUND CACHE) message(STATUS "CHECKING BLAS AND LAPACK LIBRARIES") IF(LAPACK_LIBRARIES) include(CheckFortranFunctionExists) From 82496ef180d6ef4a9d8e444477e70300ab95c3c8 Mon Sep 17 00:00:00 2001 From: arunchan Date: Mon, 28 Nov 2022 18:58:38 +0530 Subject: [PATCH 07/30] Do not input -cpp option to the C compiler Change-Id: I94dfb4631e5560e8fd422d2fb845546d9fb71b09 --- CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 37368a3b..80e705c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ if (WIN32 AND CMAKE_Fortran_COMPILER_ID MATCHES "Intel") endif() set(CMAKE_ICC_FLAGS " ") +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp" ) if (UNIX) if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") @@ -162,10 +163,8 @@ MESSAGE(STATUS "=========") # Compiler Flags option(USE_F2C "Use FORTRAN To C compatible interface for certain Complex type functions" OFF) -set(f2cflag "-cpp ") - if(USE_F2C) - set(f2cflag "-cpp -DF2C " ) + set(f2cflag "-DF2C " ) ENDIF(USE_F2C) if(ENABLE_DTL) @@ -173,7 +172,7 @@ if(ENABLE_DTL) if(UNIX) if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") add_definitions("-DAOCL_DTL ") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-none") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-none") endif () endif() ENDIF(ENABLE_DTL) From 66eec24b6c0ebc04d928d76941c661467e1a825d Mon Sep 17 00:00:00 2001 From: arunchan Date: Mon, 28 Nov 2022 19:02:55 +0530 Subject: [PATCH 08/30] Add support for icx compiler Change-Id: I080fb56aaa53bdb53dbcdd42234198b32ab00bd8 --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80e705c8..078185ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,8 +44,14 @@ if (UNIX) set( CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") message(STATUS "Found Intel icc compiler : ${CMAKE_ICC_FLAGS} ") endif () + if (("${CMAKE_C_COMPILER_ID}" STREQUAL "IntelLLVM") AND ( "${CMAKE_C_COMPILER}" MATCHES "icc" ) ) + set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -Wno-implicit-function-declaration") + set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") + message(STATUS "Found Intel icx compiler : ${CMAKE_ICC_FLAGS} ") + endif () endif () + # # MPI # From af5ab61ee322cf997fa47b962d0041af09ed6674 Mon Sep 17 00:00:00 2001 From: arunchan Date: Mon, 28 Nov 2022 19:03:53 +0530 Subject: [PATCH 09/30] Don't add CMAKE_ICC_FLAGS to CDEFS Adding CMAKE_ICC_FLAGS to CDEFS results in fortran compiler getting the same flags. So add CMAKE_ICC_FLAGS to CMAKE_C_FLAGS so that only C compiler gets it Change-Id: I274e32ed8fbf2e97a8e744012730b21f8ab9fb5f --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 078185ce..5eb4fc1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,7 +210,8 @@ if(ENABLE_SET_LIB_VERSION) add_definitions("-DAOCL_SCALAPACK_VERSION=${LIBRARY_VERSION}") endif() -ADD_DEFINITIONS( "-D${CDEFS}" "${CMAKE_ICC_FLAGS}" "${f2cflag}") +ADD_DEFINITIONS( "-D${CDEFS}" "${f2cflag}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_ICC_FLAGS}") # -------------------------------------------------- # By default static library From 1b02531651c44e1759189cdaa3c0be7d71065451 Mon Sep 17 00:00:00 2001 From: arunchan Date: Wed, 15 Feb 2023 12:03:33 +0530 Subject: [PATCH 10/30] ICC and ICX need '-no-vec' for fortran compiler While we are at it remove duplicate addition of "-fltconsistency -fp_port" to CMAKE_Fortran_FLAGS Change-Id: I49f0fbbd5b367792b70fdc19daacc7888b6b4071 --- CMakeLists.txt | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5eb4fc1a..2bac5b9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,15 +35,16 @@ set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp" ) if (UNIX) if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) - endif () - if ( "${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec") + elseif ("${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec") endif () + if (("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") AND ( "${CMAKE_C_COMPILER}" MATCHES "icc" ) ) - set( CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") + set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") message(STATUS "Found Intel icc compiler : ${CMAKE_ICC_FLAGS} ") endif () + if (("${CMAKE_C_COMPILER_ID}" STREQUAL "IntelLLVM") AND ( "${CMAKE_C_COMPILER}" MATCHES "icc" ) ) set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -Wno-implicit-function-declaration") set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") @@ -110,16 +111,6 @@ else() message(FATAL_ERROR "--> MPI Library NOT FOUND -- please set MPI_BASE_DIR accordingly --") endif() - -if (UNIX) - if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) - endif () - if ( "${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) - endif () -endif () - macro(SCALAPACK_install_library lib) install(TARGETS ${lib} EXPORT scalapack-targets ARCHIVE DESTINATION lib${LIB_SUFFIX} From 777da593d097f55754427386c6445dea11a6a6c8 Mon Sep 17 00:00:00 2001 From: "nprasadm@amd.com" Date: Tue, 27 Dec 2022 12:47:20 +0530 Subject: [PATCH 11/30] aocl-scaLAPACK framework updates: Following features added in scaLAPACK framework - Functions to detect target CPU and ISA support are included in new file cpu_features.c - New function: aocl_scalapack_init(). Initialization function that sets up context information to enable DTL, Progress feature at runtime. This function uses pthread_once and hence gets invoked only once per application thread AMD Internal:[CPUPL-2717] Change-Id: I69b1136e41707044d37dfc6b228d69fc020a6082 --- CMakeLists.txt | 79 +++++--- FRAMEWORK/CMakeLists.txt | 14 ++ FRAMEWORK/SL_Context.c | 207 +++++++++++++++++++ FRAMEWORK/SL_Context.h | 106 ++++++++++ FRAMEWORK/SL_Context_fortran_include.h | 37 ++++ FRAMEWORK/SL_Context_module.f | 74 +++++++ FRAMEWORK/cpu_features.c | 268 +++++++++++++++++++++++++ FRAMEWORK/cpu_features.h | 175 ++++++++++++++++ LICENSE | 2 +- SRC/CMakeLists.txt | 8 +- SRC/aocl_dtl_trace_entry.c | 4 +- SRC/aocl_dtl_trace_exit.c | 2 +- SRC/aocl_scalapack_progress.c | 16 +- SRC/aocl_scalapack_progress.h | 19 +- SRC/pdgetf2.f | 8 +- SRC/pdgetrf.f | 43 ++-- SRC/pdgetrf0.f | 51 ++--- SRC/pdlaswp.f | 6 +- SRC/pxsyevx.h | 31 +-- 19 files changed, 1029 insertions(+), 121 deletions(-) create mode 100644 FRAMEWORK/CMakeLists.txt create mode 100644 FRAMEWORK/SL_Context.c create mode 100644 FRAMEWORK/SL_Context.h create mode 100644 FRAMEWORK/SL_Context_fortran_include.h create mode 100644 FRAMEWORK/SL_Context_module.f create mode 100644 FRAMEWORK/cpu_features.c create mode 100644 FRAMEWORK/cpu_features.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bac5b9f..4161fa95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,9 @@ -##Copyright (C) 2021-2022, Advanced Micro Devices, Inc. All rights reserved.## +##Copyright (C) 2021-2023, Advanced Micro Devices, Inc. All rights reserved.## cmake_minimum_required(VERSION 3.2) project(SCALAPACK C Fortran) # Configure the warning and code coverage suppression file -configure_file( +configure_file( "${SCALAPACK_SOURCE_DIR}/CMAKE/CTestCustom.cmake.in" "${SCALAPACK_BINARY_DIR}/CTestCustom.cmake" COPYONLY @@ -16,16 +16,17 @@ endif () # Add the CMake directory for custon CMake modules set(CMAKE_MODULE_PATH "${SCALAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) +set(SL_FRAMEWORK_INCLUDE_PATH "${SCALAPACK_SOURCE_DIR}/FRAMEWORK") + #Build Options # ILP64 build option option(ENABLE_ILP64 "Enable ILP64 " OFF) -option(ENABLE_AOCL_PROGRESS "Enable ILP64 " OFF) -option(ENABLE_DTL "Enable ILP64 " OFF) +option(ENABLE_AOCL_PROGRESS "Enable progress feature " OFF) +option(ENABLE_DTL "Enable DTL feature " OFF) # Option: Include build number in the version string. option (ENABLE_SET_LIB_VERSION "Set library version" OFF) - if (WIN32 AND CMAKE_Fortran_COMPILER_ID MATCHES "Intel") set (CMAKE_IFORT_LIBDEPS_DIR "C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/compiler/lib/intel64_win" CACHE STRING "") endif() @@ -34,10 +35,16 @@ set(CMAKE_ICC_FLAGS " ") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp" ) if (UNIX) + if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-none -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) + endif () + if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Flang") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-132 -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) + endif () if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec") + set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec -cpp -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) elseif ("${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec") + set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec -cpp -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) endif () if (("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") AND ( "${CMAKE_C_COMPILER}" MATCHES "icc" ) ) @@ -50,6 +57,8 @@ if (UNIX) set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") message(STATUS "Found Intel icx compiler : ${CMAKE_ICC_FLAGS} ") endif () +else () + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -I ${SL_FRAMEWORK_INCLUDE_PATH}/ " ) endif () @@ -85,8 +94,8 @@ if (MPI_FOUND) PATH_SUFFIXES bin DOC "MPI Fortran compiler.") MARK_AS_ADVANCED(MPI_Fortran_COMPILER) - - + + if ("${MPI_Fortran_COMPILER}" STREQUAL "MPI_Fortran_COMPILER-NOTFOUND") message(ERROR "--> MPI Fortran Compiler NOT FOUND (please set MPI_BASE_DIR accordingly") @@ -96,7 +105,7 @@ if (MPI_FOUND) SET(CMAKE_Fortran_COMPILER "${MPI_Fortran_COMPILER}") message(STATUS "--> Fortran Compiler : ${CMAKE_Fortran_COMPILER}") endif() - + else() message(STATUS "Found MPI_LIBRARY : ${MPI_FOUND} ") set(MPI_BASE_DIR ${MPI_BASE_DIR} CACHE PATH "MPI Path") @@ -163,14 +172,13 @@ option(USE_F2C "Use FORTRAN To C compatible interface for certain Complex type f if(USE_F2C) set(f2cflag "-DF2C " ) ENDIF(USE_F2C) +MESSAGE(STATUS "CMAKE_C_COMPILER_ID = ${CMAKE_C_COMPILER_ID}") +MESSAGE(STATUS "CMAKE_Fortran_COMPILER_ID = ${CMAKE_Fortran_COMPILER_ID}") if(ENABLE_DTL) -#Enable DTL for GNU tool chain in UNIX +#Enable DTL for UNIX if(UNIX) - if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") add_definitions("-DAOCL_DTL ") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-none") - endif () endif() ENDIF(ENABLE_DTL) @@ -185,7 +193,7 @@ if(ENABLE_ILP64) add_definitions("-DInt=__int64" "-DENABLE_ILP64" "-DUInt=unsigned __int64" "-i8") endif() else(ENABLE_ILP64) - add_definitions("-DUInt=unsigned int " "-DInt=int") + add_definitions("-DUInt=unsigned int " "-DInt=int" ) ENDIF(ENABLE_ILP64) if(ENABLE_AOCL_PROGRESS) @@ -193,7 +201,7 @@ if(ENABLE_AOCL_PROGRESS) add_definitions("-DAOCL_PROGRESS") endif() ENDIF(ENABLE_AOCL_PROGRESS) - message(STATUS "ENABLE_SET_LIB_VERSION : ${ENABLE_SET_LIB_VERSION} ") + message(STATUS "ENABLE_SET_LIB_VERSION : ${ENABLE_SET_LIB_VERSION} ") if(ENABLE_SET_LIB_VERSION) string(TIMESTAMP TODAY "%Y%m%d") @@ -202,7 +210,7 @@ if(ENABLE_SET_LIB_VERSION) endif() ADD_DEFINITIONS( "-D${CDEFS}" "${f2cflag}") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_ICC_FLAGS}") +set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} ${CMAKE_ICC_FLAGS} -I ${SL_FRAMEWORK_INCLUDE_PATH}/") # -------------------------------------------------- # By default static library @@ -211,7 +219,7 @@ OPTION(BUILD_STATIC_LIBS "Build static libraries" ON ) # -------------------------------------------------- # Subdirectories that need to be processed - + macro(append_subdir_files variable dirname) get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) foreach(depfile ${holder}) @@ -299,8 +307,17 @@ MESSAGE(STATUS "=========") # # AOCL_DTL # -add_subdirectory(AOCL_DTL) -append_subdir_files(dtl "AOCL_DTL") +if (UNIX) + add_subdirectory(AOCL_DTL) + append_subdir_files(dtl "AOCL_DTL") +endif() + +# +# FRAMEWORK +# +add_subdirectory(FRAMEWORK) +append_subdir_files(framework "FRAMEWORK") +append_subdir_files(framework-C "FRAMEWORK") # # TOOLS @@ -344,11 +361,11 @@ endif () if (UNIX) if(CUSTOM_BLACS_FOUND) - add_library(scalapack ${dtl} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + add_library(scalapack ${dtl} ${framework} ${framework-C} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) else(CUSTOM_BLACS_FOUND) - add_library(scalapack ${dtl} ${blacs} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + add_library(scalapack ${dtl} ${framework} ${framework-C} ${blacs} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) endif(CUSTOM_BLACS_FOUND) @@ -356,14 +373,14 @@ else (UNIX) # Need to separate Fortran and C Code if (CMAKE_C_COMPILER_ID MATCHES MSVC) # create Fortran objects and add to scalapack library first if(CUSTOM_BLACS_FOUND) - add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) - add_library(scalapack $ ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) + add_library(scalapack $ ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) else(CUSTOM_BLACS_FOUND) - add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) - add_library(scalapack $ ${blacs} ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) + add_library(scalapack $ ${blacs} ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) @@ -371,18 +388,18 @@ else (UNIX) # Need to separate Fortran and C Code else (CMAKE_C_COMPILER_ID MATCHES Clang) # create C objects and add to scalapack library first if(CUSTOM_BLACS_FOUND) - add_library(scalapack-C OBJECT ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack-C OBJECT ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) - add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) + add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) else(CUSTOM_BLACS_FOUND) - add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) - add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) + add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) @@ -395,7 +412,7 @@ if(${SCALAPACK_BUILD_TESTS}) add_subdirectory(TESTING) endif() # -------------------------------------------------- -# CPACK Packaging +# CPACK Packaging SET(CPACK_PACKAGE_NAME "ScaLAPACK") SET(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd") diff --git a/FRAMEWORK/CMakeLists.txt b/FRAMEWORK/CMakeLists.txt new file mode 100644 index 00000000..f26a375f --- /dev/null +++ b/FRAMEWORK/CMakeLists.txt @@ -0,0 +1,14 @@ +##Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.## + +# ---------------------------------- +# aocl-scaLAPACK framework routines +# ---------------------------------- +set (framework-C + SL_Context.c cpu_features.c) + +set (framework + SL_Context_module.f) + +set(src ${framework-C} ${framework}) + +#set(framework ${framework-C} ${framework}) diff --git a/FRAMEWORK/SL_Context.c b/FRAMEWORK/SL_Context.c new file mode 100644 index 00000000..5e1f6319 --- /dev/null +++ b/FRAMEWORK/SL_Context.c @@ -0,0 +1,207 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#include "SL_Context.h" +#include +#include +#if defined(SCALAPACK_NO_CONTEXT) +// This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of "dummy" code that doesn't depend on POSIX threads or any other +// threading mechanism. +// NOTE: THIS CODE DOES NOT IMPLEMENT THREADING AND IS NOT THREAD-SAFE! +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex) +{ + //return pthread_mutex_lock( mutex ); + return 0; +} +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) +{ + //return pthread_mutex_unlock( mutex ); + return 0; +} +// -- pthread_once() -- +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) +{ + //pthread_once( once, init ); + return; +} +#elif defined(_MSC_VER) // !defined(FLA_DISABLE_SYSTEM) +#include +// This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of Windows API calls. +// -- pthread_mutex_*() -- +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex) +{ + AcquireSRWLockExclusive(mutex); + return 0; +} +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) +{ + ReleaseSRWLockExclusive(mutex); + return 0; +} +// -- pthread_once() -- +static bool + scalapack_init_once_wrapper(scalapack_pthread_once_t *once, void *param, void **context) +{ + (void)once; + (void)context; + typedef void (*callback)(void); + ((callback)param)(); + return TRUE; +} +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) +{ + InitOnceExecuteOnce(once, scalapack_init_once_wrapper, init, NULL); +} +#else // !defined(SCALAPACK_NO_CONTEXT) && !defined(_MSC_VER) +// This branch defines a pthreads-like API, scalapack_pthreads_*(), and implements it +// in terms of the corresponding pthreads_*() types, macros, and function calls. +// This branch is compiled for Linux and other non-Windows environments where +// we assume that *some* implementation of pthreads is provided (although it +// may lack barriers--see below). +// -- pthread_mutex_*() -- +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex) +{ + return pthread_mutex_lock(mutex); +} +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) +{ + return pthread_mutex_unlock(mutex); +} +// -- pthread_once() -- +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) +{ + pthread_once(once, init); +} +#endif // !defined(SCALAPACK_NO_CONTEXT) && !defined(_MSC_VER) +/* The global scalapack_context structure, which holds the global thread,ISA settings + Initialize with 0. +**/ +aocl_scalapack_global_context scalapack_context = {0,0,0}; +/* A mutex to allow synchronous access to global_thread. */ +scalapack_pthread_mutex_t global_thread_mutex = SL_PTHREAD_MUTEX_INITIALIZER; +/******************************************************************************** + * \brief scalapack_env_get_var is a function used to query the environment + * variable and convert the string into integer and return the same + ********************************************************************************/ +int scalapack_env_get_var(const char *env, int fallback) +{ + int r_val; + char *str; + // Query the environment variable and store the result in str. + str = getenv(env); + // Set the return value based on the string obtained from getenv(). + if(str != NULL) + { + // If there was no error, convert the string to an integer and + // prepare to return that integer. + r_val = (int)strtol(str, NULL, 10); + } + else + { + // If there was an error, use the "fallback" as the return value. + r_val = fallback; + } + return r_val; +} +void scalapack_thread_init_rntm_from_env(aocl_scalapack_global_context *context) +{ + int status; + /* Check whether DTL is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_DTL", -1); + + if (status == -1) + { + context->is_dtl_enabled = 0; + } + else + { + context->is_dtl_enabled = 1; + } + + /* Check whether AOCL-progress requirement is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_PROGRESS", -1); + + if (status == -1) + { + context->is_progress_enabled = 0; + } + else + { + context->is_progress_enabled = 1; + } + + /* Since multithreading support is not present in the aocl-scaLAPACK, + we set the context number of threads to 1. + NOTE: If multithread support is enabled, then we have to set the + desired num_threads from the environment. + */ + context->num_threads = 1; +} +// ----------------------------------------------------------------------------- +void scalapack_context_init(void) +{ + // Read the environment variables and use them to initialize the + // global runtime object. + scalapack_thread_init_rntm_from_env(&scalapack_context); +} +// ----------------------------------------------------------------------------- +void scalapack_context_finalize(void) {} +// ----------------------------------------------------------------------------- +// A pthread_once_t variable is a pthread structure used in pthread_once(). +// pthread_once() is guaranteed to execute exactly once among all threads that +// pass in this control object. Thus, we need one for initialization and a +// separate one for finalization. +static scalapack_pthread_once_t once_init = SL_PTHREAD_ONCE_INIT; +static scalapack_pthread_once_t once_finalize = SL_PTHREAD_ONCE_INIT; + +void aocl_scalapack_init_() +{ + scalapack_pthread_once(&once_init, scalapack_context_init); +} +void AOCL_SCALAPACK_INIT() +{ + scalapack_pthread_once(&once_init, scalapack_context_init); +} + +void aocl_scalapack_finalize(void) +{ + scalapack_pthread_once(&once_finalize, scalapack_context_finalize); +} +int scalapack_thread_get_num_threads(void) +{ + // We must ensure that global_rntm has been initialized. + aocl_scalapack_init_(); + return scalapack_context.num_threads; +} +void scalapack_thread_set_num_threads(int n_threads) +{ + // We must ensure that global_thread has been initialized. + aocl_scalapack_init_(); + // Acquire the mutex protecting global_thread. + scalapack_pthread_mutex_lock(&global_thread_mutex); + scalapack_context.num_threads = n_threads; + // Release the mutex protecting global_thread. + scalapack_pthread_mutex_unlock(&global_thread_mutex); +} diff --git a/FRAMEWORK/SL_Context.h b/FRAMEWORK/SL_Context.h new file mode 100644 index 00000000..4338d180 --- /dev/null +++ b/FRAMEWORK/SL_Context.h @@ -0,0 +1,106 @@ + +/* ************************************************************************ + * Copyright (c) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef SL_CONTEXT_H +#define SL_CONTEXT_H +#include +/* -- Type and macro definitions ----------------------------------------------- */ +#if defined( SCALAPACK_NO_CONTEXT) +/* This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of "dummy" code that doesn't depend on POSIX threads or any other +// threading mechanism. +// NOTE: THIS CODE DOES NOT IMPLEMENT THREADING AND IS NOT THREAD-SAFE! +// -- pthread types -- */ +typedef int scalapack_pthread_mutex_t; +typedef int scalapack_pthread_once_t; +/* -- pthreads macros -- */ +#define SL_PTHREAD_MUTEX_INITIALIZER 0 +#define SL_PTHREAD_ONCE_INIT 0 +#elif defined(_WIN32) +/* #ifdef _MSC_VER */ /* !defined(SCALAPACK_NO_CONTEXT) */ +#include +// This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of Windows API calls. +// -- pthread types -- +typedef SRWLOCK scalapack_pthread_mutex_t; +typedef INIT_ONCE scalapack_pthread_once_t; +// -- pthreads macros -- +#define SL_PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT +#define SL_PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT +#else /* !defined(SCALAPACK_NO_CONTEXT) && !defined(_MSC_VER)*/ +#include +/* This branch defines a pthreads-like API, scalapack_pthreads_*(), and implements it + in terms of the corresponding pthreads_*() types, macros, and function calls. + -- pthread types -- */ +typedef pthread_mutex_t scalapack_pthread_mutex_t; +typedef pthread_once_t scalapack_pthread_once_t; +/* -- pthreads macros -- */ +#define SL_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define SL_PTHREAD_ONCE_INIT PTHREAD_ONCE_INIT +#endif +/* -- Function definitions ----------------------------------------------------- + -- pthread_mutex_*() -- */ +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex); +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex); +/* -- pthread_once() -- */ +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)); +/****************************************************************************************** + * \brief scalapack_context is a structure holding the below information: + 1) Enable/Disable status of DTL logging and AOCL_Progress. + 2) In future additionally following could be added to the structure: + - The number of threads + - Target CPU ISA information + char is_fma; + char is_avx2; + char is_avx512; + 3) It gets initialised by scalapack_init_once(). + *****************************************************************************************/ +typedef struct _aocl_scalapack_global_context +{ + int num_threads; /* Number of Threads */ + int is_dtl_enabled; /* DTL log */ + int is_progress_enabled; /* AOCL-progress */ + +} aocl_scalapack_global_context; +extern aocl_scalapack_global_context scalapack_context; +typedef aocl_scalapack_global_context aocl_scalapack_global_context_; +typedef aocl_scalapack_global_context AOCL_SCALAPACK_GLOBAL_CONTEXT; +/*! \ingroup aux_module + * \brief Initialise various framework variables including + * context + * + * \retval none. + +void aocl_scalapack_init(); */ +void aocl_scalapack_init_(); +void AOCL_SCALAPACK_INIT(); +/* Alias Declarations to enable F2C calls +#define aocl_scalapack_init_ aocl_scalapack_init +#define AOCL_SCALAPACK_INIT_ aocl_scalapack_init +#define AOCL_SCALAPACK_INIT aocl_scalapack_init*/ + +/*! \ingroup aux_module + * \brief Deallocate and clean all initalized buffers + */ +void aocl_scalapack_finalize(); +#endif /* SL_CONTEXT_H */ diff --git a/FRAMEWORK/SL_Context_fortran_include.h b/FRAMEWORK/SL_Context_fortran_include.h new file mode 100644 index 00000000..a3401a4e --- /dev/null +++ b/FRAMEWORK/SL_Context_fortran_include.h @@ -0,0 +1,37 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef SL_CONTEXT_FORTRAN_H +#define SL_CONTEXT_FORTRAN_H + +#if _WIN32 +#define AOCL_DTL_TRACE_ENTRY_F CONTINUE +#define AOCL_DTL_TRACE_EXIT_F CONTINUE +#define aocl_scalapack_init_ AOCL_SCALAPACK_INIT +#else +#define AOCL_DTL_TRACE_ENTRY_F CALL SL_DTL_TRACE_ENTRY_F(FILE_NAME, __LINE__, ' ') +#define AOCL_DTL_TRACE_EXIT_F CALL SL_DTL_TRACE_EXIT_F (FILE_NAME, __LINE__, ' ') +#endif + +#endif /* SL_CONTEXT_FORTRAN_H */ diff --git a/FRAMEWORK/SL_Context_module.f b/FRAMEWORK/SL_Context_module.f new file mode 100644 index 00000000..91973e28 --- /dev/null +++ b/FRAMEWORK/SL_Context_module.f @@ -0,0 +1,74 @@ + +* ************************************************************************ +* Copyright (c) 2023 Advanced Micro Devices, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +* +* ************************************************************************ */ + MODULE LINK_TO_C_GLOBALS + USE, INTRINSIC::ISO_C_BINDING + TYPE, BIND(C)::AOCL_SCALAPACK_GLOBAL_CONTEXT + INTEGER(C_INT)::NUM_THREADS + INTEGER(C_INT)::IS_DTL_ENABLED + INTEGER(C_INT)::IS_PROGRESS_ENABLED + END TYPE + TYPE(AOCL_SCALAPACK_GLOBAL_CONTEXT),BIND(C)::SCALAPACK_CONTEXT + + END MODULE LINK_TO_C_GLOBALS + +* +* ===================================================================== +* SUBROUTINE SL_DTL_TRACE_ENTRY_F +* ===================================================================== + SUBROUTINE SL_DTL_TRACE_ENTRY_F( FILENAME, LINENUMBER, MESSAGE ) +* + USE LINK_TO_C_GLOBALS +* .. Scalar Arguments .. + INTEGER LINENUMBER +* .. +* .. Array Arguments .. + CHARACTER FILENAME( * ), MESSAGE( * ) + IF(SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1) THEN + CALL AOCL_SL_DTL_TRACE_ENTRY(FILENAME, LINENUMBER, MESSAGE) + END IF + RETURN +* +* End of SL_DTL_TRACE_ENTRY_F +* + END +* +* ===================================================================== +* SUBROUTINE SL_DTL_TRACE_EXIT_F +* ===================================================================== + SUBROUTINE SL_DTL_TRACE_EXIT_F( FILENAME, LINENUMBER, MESSAGE ) +* + USE LINK_TO_C_GLOBALS +* .. Scalar Arguments .. + INTEGER LINENUMBER +* .. +* .. Array Arguments .. + CHARACTER FILENAME( * ), MESSAGE( * ) + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN + CALL AOCL_SL_DTL_TRACE_EXIT(FILENAME, LINENUMBER, MESSAGE) + END IF + RETURN +* +* End of SL_DTL_TRACE_ENTRY_F +* + END diff --git a/FRAMEWORK/cpu_features.c b/FRAMEWORK/cpu_features.c new file mode 100644 index 00000000..650b7ef7 --- /dev/null +++ b/FRAMEWORK/cpu_features.c @@ -0,0 +1,268 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#include "cpu_features.h" +#define ALC_CPU_FEATURE_REG(ftr, idx, reg) ({ \ + uint32_t val; \ + struct alc_cpuid_regs *r; \ + r = &(ftr)->available[0]; \ + val = r[(idx)].reg; \ + val; \ + }) +#define ALC_CPU_FEATURE(ptr, idx, reg, bit) ({ \ + uint32_t __reg = \ + ALC_CPU_FEATURE_REG(ptr, idx, reg); \ + (__reg & bit); \ + }) +/* For AVX512 instructions */ +#define ALC_CPU_HAS_AVX512F(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512F) /* For AVX512 foundation flag */ +#define ALC_CPU_HAS_AVX512DQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512DQ) +#define ALC_CPU_HAS_AVX512BW(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512BW) +#define ALC_CPU_HAS_AVX512ER(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512ER) +#define ALC_CPU_HAS_AVX512CD(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512CD) +#define ALC_CPU_HAS_AVX512VL(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512VL) +#define ALC_CPU_HAS_AVX512PF(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512PF) +#define ALC_CPU_HAS_AVX512_IFMA(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512_IFMA) +#define ALC_CPU_HAS_AVX512_VNNI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VNNI) +#define ALC_CPU_HAS_AVX512_BITALG(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_BITALG) +#define ALC_CPU_HAS_AVX512_VBMI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI) +#define ALC_CPU_HAS_AVX512_VBMI2(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI2) +#define ALC_CPU_HAS_AVX512_VPOPCNTDQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VPOPCNTDQ) +struct alc_cpu_features cpu_features; +struct +{ + uint32_t eax; + uint32_t ecx; +} __cpuid_values[ALC_CPUID_MAX] = { + [ALC_CPUID_EAX_1] = { 0x1, 0x0 }, /* eax = 0, ecx=0 */ + [ALC_CPUID_EAX_7] = { 0x7, 0x0 }, /* eax = 7, -"- */ + [ALC_CPUID_EAX_8_01] = { 0x80000001, 0x0 }, /* eax = 0x80000001 */ + [ALC_CPUID_EAX_8_07] = { 0x80000007, 0x0 }, /* eax = 0x80000007 */ + [ALC_CPUID_EAX_8_08] = { 0x80000008, 0x0 }, /* eax = 0x80000008 */ +}; +static inline uint32_t +__extract32(uint32_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 32 - start); + return (value >> start) & (~0U >> (32 - length)); +} +static inline uint16_t +alc_cpuid_get_family(uint32_t var) +{ + return (uint16_t)(__extract32(var, 20, 8) + + __extract32(var, 8, 4)); +} +static inline uint16_t +alc_cpuid_get_model(uint32_t var) +{ + return (uint16_t)(__extract32(var, 16, 4) << 4 | + __extract32(var, 4, 4)); +} +static inline uint16_t +alc_cpuid_get_stepping(uint32_t var) +{ + return (uint16_t)(__extract32(var, 20, 8) + + __extract32(var, 8, 4)); +} +static inline void __cpuid(struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + ); +} +static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + :"0"(eax) + ); +} +static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + :"0"(eax), "2"(ecx) + ); +} +static void +__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, + struct alc_cpu_mfg_info* mfg_info) +{ + uint16_t model; + uint16_t family; + if (mfg_info) { + struct alc_cpuid_regs regs; + __cpuid_1(1, ®s); + family = alc_cpuid_get_family(regs.eax); + model = alc_cpuid_get_model(regs.eax); + if (family >= ALC_CPU_FAMILY_ZEN) { + mfg_info->family = (uint16_t)family; + mfg_info->model = (uint16_t)model; + } + mfg_info->stepping = alc_cpuid_get_stepping(regs.eax); + } +} +static void +__init_cpu_features(void) +{ + static unsigned initialized = 0; + struct alc_cpu_mfg_info* mfg_info = &cpu_features.cpu_mfg_info; + int arr_size = ARRAY_SIZE(__cpuid_values); + if (initialized == INITIALIZED_MAGIC) + return; + struct alc_cpuid_regs regs; + __cpuid_1(0, ®s); + /* "AuthenticAMD" */ + if (regs.ebx == 0x68747541 && regs.ecx == 0x444d4163 + && regs.edx == 0x69746e65) { + cpu_features.cpu_mfg_info.mfg_type = ALC_CPU_MFG_AMD; + } + for (int i = 0; i < arr_size; i++) { + struct alc_cpuid_regs ft; + __cpuid_2(__cpuid_values[i].eax, __cpuid_values[i].ecx, &ft); + cpu_features.available[i].eax = ft.eax; + cpu_features.available[i].ebx = ft.ebx; + cpu_features.available[i].ecx = ft.ecx; + cpu_features.available[i].edx = ft.edx; + } + __get_mfg_info(&cpu_features.available[ALC_CPUID_EAX_1], mfg_info); + /* + * Globally disable some *_USEABLE flags, so that all ifunc's + * sees them + */ + if (mfg_info->mfg_type == ALC_CPU_MFG_AMD + && mfg_info->family >= ALC_CPU_FAMILY_ZEN) { + memcpy(&cpu_features.usable[0], + &cpu_features.available[0], + sizeof(cpu_features.usable)); + } + initialized = INITIALIZED_MAGIC; +} +uint32_t +alc_cpu_has_avx512f(void) +{ + __init_cpu_features(); + return ALC_CPU_HAS_AVX512F(&cpu_features); +} +uint32_t +alc_cpu_has_avx512dq(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512DQ(&cpu_features); +} +uint32_t +alc_cpu_has_avx512bw(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512BW(&cpu_features); +} +uint32_t +alc_cpu_has_avx512er(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512ER(&cpu_features); +} +uint32_t +alc_cpu_has_avx512cd(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512CD(&cpu_features); +} +uint32_t +alc_cpu_has_avx512vl(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512VL(&cpu_features); +} +uint32_t +alc_cpu_has_avx512pf(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512PF(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_ifma(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_IFMA(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vnni(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VNNI(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_bitalg(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_BITALG(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vbmi(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VBMI(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vbmi2(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VBMI2(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vpopcntdq(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VPOPCNTDQ(&cpu_features); +} diff --git a/FRAMEWORK/cpu_features.h b/FRAMEWORK/cpu_features.h new file mode 100644 index 00000000..b19399b1 --- /dev/null +++ b/FRAMEWORK/cpu_features.h @@ -0,0 +1,175 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef SL_CPUFEATURES_H +#define SL_CPUFEATURES_H +#include +#include +#include +enum { + ALC_CPUID_EAX_1 = 0, + ALC_CPUID_EAX_7, + ALC_CPUID_EAX_8_01, /* 8000.0001 */ + ALC_CPUID_EAX_8_07, /* 8000.0007 */ + ALC_CPUID_EAX_8_08, /* 8000.0008 */ + /* Last entry */ + ALC_CPUID_MAX, +}; +enum { + /*EBX Values*/ + ALC_CPUID_BIT_FSGSBASE = (1u << 0), + ALC_CPUID_BIT_TSC_ADJUST = (1u << 1), + ALC_CPUID_BIT_SGX = (1u << 2), + ALC_CPUID_BIT_BMI1 = (1u << 3), + ALC_CPUID_BIT_HLE = (1u << 4), + ALC_CPUID_BIT_AVX2 = (1u << 5), + ALC_CPUID_BIT_SMEP = (1u << 7), + ALC_CPUID_BIT_BMI2 = (1u << 8), + ALC_CPUID_BIT_ERMS = (1u << 9), + ALC_CPUID_BIT_INVPCID = (1u << 10), + ALC_CPUID_BIT_RTM = (1u << 11), + ALC_CPUID_BIT_TSX = ALC_CPUID_BIT_RTM, + ALC_CPUID_BIT_PQM = (1u << 12), + ALC_CPUID_BIT_MPX = (1u << 14), + ALC_CPUID_BIT_PQE = (1u << 15), + ALC_CPUID_BIT_AVX512F = (1u << 16), + ALC_CPUID_BIT_AVX512DQ = (1u << 17), + ALC_CPUID_BIT_RDSEED = (1u << 18), + ALC_CPUID_BIT_ADX = (1u << 19), + ALC_CPUID_BIT_SMAP = (1u << 20), + ALC_CPUID_BIT_AVX512_IFMA = (1u << 21), + ALC_CPUID_BIT_CLFLUSHOPT = (1u << 22), + ALC_CPUID_BIT_CLWB = (1u << 24), + ALC_CPUID_BIT_TRACE = (1u << 25), + ALC_CPUID_BIT_AVX512PF = (1u << 26), + ALC_CPUID_BIT_AVX512ER = (1u << 27), + ALC_CPUID_BIT_AVX512CD = (1u << 28), + ALC_CPUID_BIT_SHA = (1u << 29), + ALC_CPUID_BIT_AVX512BW = (1u << 30), + ALC_CPUID_BIT_AVX512VL = (1u << 31), + /* ECX Values*/ + ALC_CPUID_BIT_PREFETCHWT1 = (1u << 0), + ALC_CPUID_BIT_AVX512_VBMI = (1u << 1), + ALC_CPUID_BIT_UMIP = (1u << 2), + ALC_CPUID_BIT_PKU = (1u << 3), + ALC_CPUID_BIT_OSPKE = (1u << 4), + ALC_CPUID_BIT_WAITPKG = (1u << 5), + ALC_CPUID_BIT_AVX512_VBMI2 = (1u << 6), + ALC_CPUID_BIT_SHSTK = (1u << 7), + ALC_CPUID_BIT_GFNI = (1u << 8), + ALC_CPUID_BIT_VAES = (1u << 9), + ALC_CPUID_BIT_VPCLMULQDQ = (1u << 10), + ALC_CPUID_BIT_AVX512_VNNI = (1u << 11), + ALC_CPUID_BIT_AVX512_BITALG = (1u << 12), + ALC_CPUID_BIT_AVX512_VPOPCNTDQ = (1u << 14), + ALC_CPUID_BIT_RDPID = (1u << 22), + ALC_CPUID_BIT_CLDEMOTE = (1u << 25), + ALC_CPUID_BIT_MOVDIRI = (1u << 27), + ALC_CPUID_BIT_MOVDIR64B = (1u << 28), + ALC_CPUID_BIT_SGX_LC = (1u << 30), + /* EDX Values */ + ALC_CPUID_BIT_AVX512_4VNNIW = (1u << 2), + ALC_CPUID_BIT_AVX512_4FMAPS = (1u << 3), + ALC_CPUID_BIT_FSRM = (1u << 4), + ALC_CPUID_BIT_PCONFIG = (1u << 18), + ALC_CPUID_BIT_IBT = (1u << 20), + ALC_CPUID_BIT_IBRS_IBPB = (1u << 26), + ALC_CPUID_BIT_STIBP = (1u << 27), + ALC_CPUID_BIT_CAPABILITIES = (1u << 29), + ALC_CPUID_BIT_SSBD = (1u << 31), +}; +#define ALC_CPU_FAMILY_ZEN 0x17 +#define ALC_CPU_FAMILY_ZEN_PLUS 0x17 +#define ALC_CPU_FAMILY_ZEN2 0x17 +#define ALC_CPU_FAMILY_ZEN3 0x19 +#define ALC_CPU_FAMILY_ZEN4 0x19 +static inline uint32_t +__extract32(uint32_t value, int start, int length); +static inline uint16_t +alc_cpuid_get_family(uint32_t var); +static inline uint16_t +alc_cpuid_get_model(uint32_t var); +static inline uint16_t +alc_cpuid_get_stepping(uint32_t var); +/* ID return values */ +struct alc_cpuid_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; +typedef enum { + ALC_CPU_MFG_INTEL, + ALC_CPU_MFG_AMD, + ALC_CPU_MFG_OTHER, +} alc_cpu_mfg_t; +struct alc_cpu_mfg_info { + alc_cpu_mfg_t mfg_type; + uint16_t family; + uint16_t model; + uint16_t stepping; +}; +struct alc_cpu_features { + struct alc_cpu_mfg_info cpu_mfg_info; + struct alc_cpuid_regs available[ALC_CPUID_MAX]; + struct alc_cpuid_regs usable[ALC_CPUID_MAX]; +}; +static inline void __cpuid(struct alc_cpuid_regs *out); +static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out); +static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out); +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif +#define INITIALIZED_MAGIC 0xdeadbeaf +static void +__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, + struct alc_cpu_mfg_info* mfg_info); +static void +__init_cpu_features(void); +uint32_t +alc_cpu_has_avx512f(void); +uint32_t +alc_cpu_has_avx512dq(void); +uint32_t +alc_cpu_has_avx512bw(void); +uint32_t +alc_cpu_has_avx512er(void); +uint32_t +alc_cpu_has_avx512cd(void); +uint32_t +alc_cpu_has_avx512vl(void); +uint32_t +alc_cpu_has_avx512pf(void); +uint32_t +alc_cpu_has_avx512_ifma(void); +uint32_t +alc_cpu_has_avx512_vnni(void); +uint32_t +alc_cpu_has_avx512_bitalg(void); +uint32_t +alc_cpu_has_avx512_vbmi(void); +uint32_t +alc_cpu_has_avx512_vbmi2(void); +uint32_t +alc_cpu_has_avx512_vpopcntdq(void); +#endif //SL_CPUFEATURES_H diff --git a/LICENSE b/LICENSE index e5aa1c83..bd156b6b 100644 --- a/LICENSE +++ b/LICENSE @@ -5,7 +5,7 @@ Copyright (c) 2000-2011 The University of California Berkeley. All rights reserved. Copyright (c) 2006-2011 The University of Colorado Denver. All rights reserved. -Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights +Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights reserved. $COPYRIGHT$ diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt index 8887bfd0..0954e3f6 100644 --- a/SRC/CMakeLists.txt +++ b/SRC/CMakeLists.txt @@ -1,17 +1,17 @@ -##Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.## +##Copyright (C) 2021-2023, Advanced Micro Devices, Inc. All rights reserved.## set (ALLAUX pjlaenv.f pilaenvx.f piparmq.f pilaver.f pmpim2.f pmpcol.f) if(ENABLE_DTL) set (ALLAUX-C pbchkvect.c getpbbuf.c pcrot.c pslaiect.c pdlaiect.c pzrot.c - slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c + slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c get_aocl_scalapack_version.c aocl_dtl_trace_entry.c aocl_dtl_trace_exit.c) else(ENABLE_DTL) set (ALLAUX-C pbchkvect.c getpbbuf.c pcrot.c pslaiect.c pdlaiect.c pzrot.c - slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c - get_aocl_scalapack_version.c) + slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c + get_aocl_scalapack_version.c aocl_dtl_trace_entry.c aocl_dtl_trace_exit.c) ENDIF(ENABLE_DTL) set (SCLAUX diff --git a/SRC/aocl_dtl_trace_entry.c b/SRC/aocl_dtl_trace_entry.c index e20fc172..2c9e73fb 100644 --- a/SRC/aocl_dtl_trace_entry.c +++ b/SRC/aocl_dtl_trace_entry.c @@ -17,7 +17,7 @@ /* Customized for Fortran calls from Scalapack code */ -void aocl_dtl_log_entry_( char *buffer ) +void aocl_sl_dtl_log_entry_( char *buffer ) { #if AOCL_DTL_LOG_ENABLE /* Capture the contents to the DTL log file */ @@ -25,7 +25,7 @@ void aocl_dtl_log_entry_( char *buffer ) #endif } -void aocl_dtl_trace_entry_( const char * fileName, unsigned int * lineNumber, +void aocl_sl_dtl_trace_entry_( const char * fileName, unsigned int * lineNumber, const char * message ) { #if AOCL_DTL_TRACE_ENABLE diff --git a/SRC/aocl_dtl_trace_exit.c b/SRC/aocl_dtl_trace_exit.c index 0af64043..aaaa7afe 100644 --- a/SRC/aocl_dtl_trace_exit.c +++ b/SRC/aocl_dtl_trace_exit.c @@ -16,7 +16,7 @@ #include "../AOCL_DTL/aocldtl.h" #include "pxsyevx.h" -void aocl_dtl_trace_exit_( const char * fileName, unsigned int * lineNumber, +void aocl_sl_dtl_trace_exit_( const char * fileName, unsigned int * lineNumber, const char * message ) { #if AOCL_DTL_TRACE_ENABLE diff --git a/SRC/aocl_scalapack_progress.c b/SRC/aocl_scalapack_progress.c index 0a52212b..dbd6af4f 100644 --- a/SRC/aocl_scalapack_progress.c +++ b/SRC/aocl_scalapack_progress.c @@ -20,13 +20,23 @@ void aocl_scalapack_set_progress_( aocl_scalapack_progress_callback func ) aocl_scalapack_progress_ptr_ = func; } -void aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, +integer aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, const integer* current_process, const integer *total_processes) { - integer ret; + integer ret = 0; if (aocl_scalapack_progress_ptr_ != NULL ) { ret = aocl_scalapack_progress_ptr_ ( api, lenapi, progress, current_process, total_processes); } - return; + return ret; +} +integer AOCL_SCALAPACK_PROGRESS(const char* const api, const integer* lenapi, const integer* progress, + const integer* current_process, const integer* total_processes) +{ + integer ret = 0; + if (aocl_scalapack_progress_ptr_ != NULL) { + ret = aocl_scalapack_progress_ptr_(api, lenapi, progress, current_process, total_processes); + } + + return ret; } diff --git a/SRC/aocl_scalapack_progress.h b/SRC/aocl_scalapack_progress.h index ac295258..31099a4c 100644 --- a/SRC/aocl_scalapack_progress.h +++ b/SRC/aocl_scalapack_progress.h @@ -30,12 +30,19 @@ const integer *total_processes ); -integer aocl_scalapack_progress ( -const char * const api, -const integer *lenapi, -const integer *progress, -const integer *current_process, -const integer *total_processes +integer aocl_scalapack_progress_( + const char* const api, + const integer* lenapi, + const integer* progress, + const integer* current_process, + const integer* total_processes +); +integer AOCL_SCALAPACK_PROGRESS( + const char* const api, + const integer* lenapi, + const integer* progress, + const integer* current_process, + const integer* total_processes ); aocl_scalapack_progress_callback aocl_scalapack_progress_ptr_; diff --git a/SRC/pdgetf2.f b/SRC/pdgetf2.f index b532aa9d..a5c87772 100644 --- a/SRC/pdgetf2.f +++ b/SRC/pdgetf2.f @@ -161,7 +161,7 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * .. Debug trace log capture if the DTL is enabled #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') #endif * * Get grid parameters. @@ -196,7 +196,7 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) CALL BLACS_ABORT( ICTXT, 1 ) * #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') #endif RETURN END IF @@ -205,7 +205,7 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( M.EQ.0 .OR. N.EQ.0 ) THEN #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') #endif RETURN END IF @@ -260,7 +260,7 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') #endif RETURN * diff --git a/SRC/pdgetrf.f b/SRC/pdgetrf.f index 78b558dc..7bdd985c 100644 --- a/SRC/pdgetrf.f +++ b/SRC/pdgetrf.f @@ -1,20 +1,19 @@ * -- ScaLAPACK routine -- -* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. -* -* ===================================================================== -* SUBROUTINE PDGETRF -* ===================================================================== - SUBROUTINE PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) -* -* -- ScaLAPACK routine (version 2.1.0) -- -* Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -* June 10, 2020 +* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 25, 2001 * +#include "SL_Context_fortran_include.h" +* +* ===================================================================== +* SUBROUTINE PDGETRF +* ===================================================================== + SUBROUTINE PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) +* + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, IA, JA, M, N * .. @@ -142,20 +141,24 @@ SUBROUTINE PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * ===================================================================== * -#ifdef AOCL_DTL - CHARACTER BUFFER*90 - CALL AOCL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') - WRITE(BUFFER,101) M, N, IA, JA - 101 FORMAT('pdgetrf inputs: M: ', I2, ' N: ', I2 ,' IA: ', I2,' JA: ', I2 ) - CALL AOCL_DTL_LOG_ENTRY( BUFFER ) -#endif + CHARACTER BUFFER*450 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetrf.f' + CALL AOCL_SCALAPACK_INIT( ) + AOCL_DTL_TRACE_ENTRY_F +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,101) M, N, IA, JA + 101 FORMAT('pdgetrf inputs: M: ', I2, ' N: ', I2 ,' + $ IA: ', I2,' JA: ', I2 ) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF * #ifdef ENABLE_LOOK_AHEAD_FOR_LU * .. * .. Local Scalars .. -* Defining the threshold to invoke look-ahead +* Defining the threshold to invoke look-ahead INTEGER CTXT_, LU_THRESHOLD, NB_, MN, NB INTEGER ICTXT, MYCOL, MYROW, NPCOL, NPROW * .. @@ -203,9 +206,7 @@ SUBROUTINE PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) CALL PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) #endif /* ENABLE_LOOK_AHEAD_FOR_LU */ * -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETRF diff --git a/SRC/pdgetrf0.f b/SRC/pdgetrf0.f index e69066ca..6ab7e5c6 100644 --- a/SRC/pdgetrf0.f +++ b/SRC/pdgetrf0.f @@ -1,11 +1,14 @@ * -- ScaLAPACK routine -- -* Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. * +#include "SL_Context_fortran_include.h" + * ===================================================================== * SUBROUTINE PDGETRF0 * ===================================================================== SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. @@ -146,14 +149,14 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) CHARACTER COLBTOP, COLCTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW -* -#ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* .. +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS, RET -* .. Declaring below API name string and its length as const objects - CHARACTER*8, PARAMETER :: API_NAME = 'PDGETRF ' +* .. Declaring below 'API NAME' string and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = 'PDGETRF' // C_NULL_CHAR INTEGER, PARAMETER :: LSTAGE = 8 -#endif * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) @@ -173,6 +176,7 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTRINSIC MIN, MOD * .. * .. Executable Statements .. + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetrf0.f' // C_NULL_CHAR * * Get grid parameters * @@ -203,10 +207,6 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRF', -INFO ) -* -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif RETURN END IF * @@ -214,14 +214,8 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) * IF( DESCA( M_ ).EQ.1 ) THEN IPIV( 1 ) = 1 -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif RETURN ELSE IF( M.EQ.0 .OR. N.EQ.0 ) THEN -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif RETURN END IF * @@ -241,10 +235,10 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) JB = JN - JA + 1 * -#ifdef AOCL_PROGRESS - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL -#endif + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF * Factor diagonal and subdiagonal blocks and test for exact * singularity. @@ -279,13 +273,13 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) DO 10 J = JN+1, JA+MN-1, DESCA( NB_ ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA -#ifdef AOCL_PROGRESS -* Capture the Loop count 'J' to a separate 'PROGRESS' variable -* to avoid the corruption at application side. - PROGRESS = J - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* Capture the Loop count 'J' to a separate 'PROGRESS' variable +* to avoid the corruption at application side. + PROGRESS = J + RET = AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) -#endif + END IF * * Factor diagonal and subdiagonal blocks and test for exact * singularity. @@ -337,9 +331,6 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) * -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif RETURN * * End of PDGETRF diff --git a/SRC/pdlaswp.f b/SRC/pdlaswp.f index 0ba8ed17..43ed72ed 100644 --- a/SRC/pdlaswp.f +++ b/SRC/pdlaswp.f @@ -156,13 +156,13 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, * .. Executable Statements .. * #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') #endif * Quick return if possible * IF( N.EQ.0 ) THEN #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') #endif RETURN END IF @@ -212,7 +212,7 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, END IF * #ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') + CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') #endif RETURN * diff --git a/SRC/pxsyevx.h b/SRC/pxsyevx.h index dcc1e323..b0487052 100644 --- a/SRC/pxsyevx.h +++ b/SRC/pxsyevx.h @@ -9,7 +9,7 @@ * NOCHANGE indicates that fortran will be calling, and that it expects * the name called by fortran to be identical to that compiled by the C * (RS6K's do this). UPCASE says it expects C routines called by fortran - * to be in all upcase (CRAY wants this). + * to be in all upcase (CRAY wants this). */ #define ADD_ 0 @@ -49,7 +49,7 @@ #if (F77_CALL_C == UPCASE) /* * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine + * routine call a C routine * following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void PDGEMM(...) @@ -64,18 +64,17 @@ #define pslachkieee_ PSLACHKIEEE #define pslaiect_ PSLAIECT -#define get_aocl_scalapack_version_ GET_AOCL_SCALAPACK_VERSION -#define aocl_scalapack_progress_ AOCL_SCALAPACK_PROGRESS -#define aocl_dtl_trace_entry_ AOCL_DTL_TRACE_ENTRY -#define aocl_dtl_trace_exit_ AOCL_DTL_TRACE_EXIT -#define aocl_dtl_log_entry_ AOCL_DTL_LOG_ENTRY -#define aocl_dtl_log_exit_ AOCL_DTL_LOG_EXIT +#define get_aocl_scalapack_version_ GET_AOCL_SCALAPACK_VERSION +#define aocl_sl_dtl_trace_entry_ AOCL_SL_DTL_TRACE_ENTRY +#define aocl_sl_dtl_trace_exit_ AOCL_SL_DTL_TRACE_EXIT +#define aocl_sl_dtl_log_entry_ AOCL_SL_DTL_LOG_ENTRY +#define aocl_dtl_log_exit_ AOCL_DTL_LOG_EXIT #endif #if (F77_CALL_C == NOCHANGE) /* * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine + * routine call a C routine * for following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void pdgemm(...) @@ -90,10 +89,12 @@ #define pslachkieee_ pslachkieee #define pslaiect_ pslaiect -#define get_aocl_scalapack_version_ get_aocl_scalapack_version -#define aocl_scalapack_progress_ aocl_scalapack_progress -#define aocl_dtl_trace_entry_ aocl_dtl_trace_entry -#define aocl_dtl_trace_exit_ aocl_dtl_trace_exit -#define aocl_dtl_log_entry_ aocl_dtl_log_entry -#define aocl_dtl_log_exit_ aocl_dtl_log_exit +#define get_aocl_scalapack_version_ get_aocl_scalapack_version +#define aocl_scalapack_progress_ aocl_scalapack_progress +#define aocl_dtl_trace_entry_ aocl_dtl_trace_entry +#define aocl_dtl_trace_exit_ aocl_dtl_trace_exit +#define aocl_dtl_log_entry_ aocl_dtl_log_entry +#define aocl_dtl_log_exit_ aocl_dtl_log_exit +#define aocl_scalapack_init_ aocl_scalapack_init + #endif From 0abd7d426e96f0048b72c25308b66161e75ea0ba Mon Sep 17 00:00:00 2001 From: nprasadm Date: Wed, 22 Feb 2023 11:54:11 +0530 Subject: [PATCH 12/30] aocl-scaLAPACK: Fix added for the hang issue with xsllt (single precision Cholesky) test case with Flang based compiler. - Initialization of variable 'EST' to '0' in the APIs related to computation of error bounds and backward error estimates. - This would avoid incorrect calculation of forward error. Signed-off-by: Nagendra AMD-Internal: [SWLCSG-1824] Change-Id: Idb5da40e145ae95020cd05b2dcd6595c6bc6fc88 --- SRC/pcgerfs.f | 2 ++ SRC/pcporfs.f | 2 ++ SRC/pdgerfs.f | 1 + SRC/pdporfs.f | 2 ++ SRC/psgerfs.f | 4 ++++ SRC/psporfs.f | 2 ++ SRC/pzgerfs.f | 3 +++ SRC/pzporfs.f | 2 ++ 8 files changed, 18 insertions(+) diff --git a/SRC/pcgerfs.f b/SRC/pcgerfs.f index 3a04bc69..10f301f8 100644 --- a/SRC/pcgerfs.f +++ b/SRC/pcgerfs.f @@ -308,6 +308,8 @@ SUBROUTINE PCGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. +* .. Initialize EST + EST = (0.0, 0.0) * * Get grid parameters * diff --git a/SRC/pcporfs.f b/SRC/pcporfs.f index 233be2aa..4f50306a 100644 --- a/SRC/pcporfs.f +++ b/SRC/pcporfs.f @@ -306,6 +306,8 @@ SUBROUTINE PCPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) * .. * .. Executable Statements .. +* .. Initialize EST + EST = (0.0, 0.0) * * Get grid parameters * diff --git a/SRC/pdgerfs.f b/SRC/pdgerfs.f index 557916f1..7ffd5e9c 100644 --- a/SRC/pdgerfs.f +++ b/SRC/pdgerfs.f @@ -301,6 +301,7 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. + EST = 0.0 * * Get grid parameters * diff --git a/SRC/pdporfs.f b/SRC/pdporfs.f index f827625d..2fa87ee6 100644 --- a/SRC/pdporfs.f +++ b/SRC/pdporfs.f @@ -298,6 +298,8 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. * .. Executable Statements .. +* .. Initialize EST + EST = 0.0 * * Get grid parameters * diff --git a/SRC/psgerfs.f b/SRC/psgerfs.f index 792d0782..5a9de957 100644 --- a/SRC/psgerfs.f +++ b/SRC/psgerfs.f @@ -302,6 +302,10 @@ SUBROUTINE PSGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * .. * .. Executable Statements .. * +* +* .. Initialize EST + EST = (0.0, 0.0) + * Get grid parameters * ICTXT = DESCA( CTXT_ ) diff --git a/SRC/psporfs.f b/SRC/psporfs.f index 38401354..34a228c5 100644 --- a/SRC/psporfs.f +++ b/SRC/psporfs.f @@ -298,6 +298,8 @@ SUBROUTINE PSPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. +* .. Initialize EST + EST = 0.0 * * Get grid parameters * diff --git a/SRC/pzgerfs.f b/SRC/pzgerfs.f index 9bbf0459..c22a7b89 100644 --- a/SRC/pzgerfs.f +++ b/SRC/pzgerfs.f @@ -309,6 +309,9 @@ SUBROUTINE PZGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * .. * .. Executable Statements .. * +* .. Initialize EST + EST = (0.0, 0.0) +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) diff --git a/SRC/pzporfs.f b/SRC/pzporfs.f index 7d76c0d7..ec756d7f 100644 --- a/SRC/pzporfs.f +++ b/SRC/pzporfs.f @@ -306,6 +306,8 @@ SUBROUTINE PZPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. +* .. Initialize EST + EST = (0.0, 0.0) * * Get grid parameters * From ccfc4814f69370ced0561e854c36ac07f1175b08 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Wed, 8 Mar 2023 12:07:03 +0530 Subject: [PATCH 13/30] aocl-scaLAPACK: dos2unix conversion made for the files with windows style carriage return characters. Change-Id: I448af8b9b79543a7625b4816dcb3c52fc27e311c --- FRAMEWORK/cpu_features.c | 536 +++++++++++++++++++-------------------- FRAMEWORK/cpu_features.h | 350 ++++++++++++------------- 2 files changed, 443 insertions(+), 443 deletions(-) diff --git a/FRAMEWORK/cpu_features.c b/FRAMEWORK/cpu_features.c index 650b7ef7..3819f142 100644 --- a/FRAMEWORK/cpu_features.c +++ b/FRAMEWORK/cpu_features.c @@ -1,268 +1,268 @@ - -/* ************************************************************************ - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * ************************************************************************ */ -#include "cpu_features.h" -#define ALC_CPU_FEATURE_REG(ftr, idx, reg) ({ \ - uint32_t val; \ - struct alc_cpuid_regs *r; \ - r = &(ftr)->available[0]; \ - val = r[(idx)].reg; \ - val; \ - }) -#define ALC_CPU_FEATURE(ptr, idx, reg, bit) ({ \ - uint32_t __reg = \ - ALC_CPU_FEATURE_REG(ptr, idx, reg); \ - (__reg & bit); \ - }) -/* For AVX512 instructions */ -#define ALC_CPU_HAS_AVX512F(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512F) /* For AVX512 foundation flag */ -#define ALC_CPU_HAS_AVX512DQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512DQ) -#define ALC_CPU_HAS_AVX512BW(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512BW) -#define ALC_CPU_HAS_AVX512ER(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512ER) -#define ALC_CPU_HAS_AVX512CD(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512CD) -#define ALC_CPU_HAS_AVX512VL(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512VL) -#define ALC_CPU_HAS_AVX512PF(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512PF) -#define ALC_CPU_HAS_AVX512_IFMA(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512_IFMA) -#define ALC_CPU_HAS_AVX512_VNNI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VNNI) -#define ALC_CPU_HAS_AVX512_BITALG(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_BITALG) -#define ALC_CPU_HAS_AVX512_VBMI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI) -#define ALC_CPU_HAS_AVX512_VBMI2(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI2) -#define ALC_CPU_HAS_AVX512_VPOPCNTDQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VPOPCNTDQ) -struct alc_cpu_features cpu_features; -struct -{ - uint32_t eax; - uint32_t ecx; -} __cpuid_values[ALC_CPUID_MAX] = { - [ALC_CPUID_EAX_1] = { 0x1, 0x0 }, /* eax = 0, ecx=0 */ - [ALC_CPUID_EAX_7] = { 0x7, 0x0 }, /* eax = 7, -"- */ - [ALC_CPUID_EAX_8_01] = { 0x80000001, 0x0 }, /* eax = 0x80000001 */ - [ALC_CPUID_EAX_8_07] = { 0x80000007, 0x0 }, /* eax = 0x80000007 */ - [ALC_CPUID_EAX_8_08] = { 0x80000008, 0x0 }, /* eax = 0x80000008 */ -}; -static inline uint32_t -__extract32(uint32_t value, int start, int length) -{ - assert(start >= 0 && length > 0 && length <= 32 - start); - return (value >> start) & (~0U >> (32 - length)); -} -static inline uint16_t -alc_cpuid_get_family(uint32_t var) -{ - return (uint16_t)(__extract32(var, 20, 8) + - __extract32(var, 8, 4)); -} -static inline uint16_t -alc_cpuid_get_model(uint32_t var) -{ - return (uint16_t)(__extract32(var, 16, 4) << 4 | - __extract32(var, 4, 4)); -} -static inline uint16_t -alc_cpuid_get_stepping(uint32_t var) -{ - return (uint16_t)(__extract32(var, 20, 8) + - __extract32(var, 8, 4)); -} -static inline void __cpuid(struct alc_cpuid_regs *out) -{ - __asm__ volatile - ( - "cpuid" - :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) - ); -} -static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out) -{ - __asm__ volatile - ( - "cpuid" - :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) - :"0"(eax) - ); -} -static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out) -{ - __asm__ volatile - ( - "cpuid" - :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) - :"0"(eax), "2"(ecx) - ); -} -static void -__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, - struct alc_cpu_mfg_info* mfg_info) -{ - uint16_t model; - uint16_t family; - if (mfg_info) { - struct alc_cpuid_regs regs; - __cpuid_1(1, ®s); - family = alc_cpuid_get_family(regs.eax); - model = alc_cpuid_get_model(regs.eax); - if (family >= ALC_CPU_FAMILY_ZEN) { - mfg_info->family = (uint16_t)family; - mfg_info->model = (uint16_t)model; - } - mfg_info->stepping = alc_cpuid_get_stepping(regs.eax); - } -} -static void -__init_cpu_features(void) -{ - static unsigned initialized = 0; - struct alc_cpu_mfg_info* mfg_info = &cpu_features.cpu_mfg_info; - int arr_size = ARRAY_SIZE(__cpuid_values); - if (initialized == INITIALIZED_MAGIC) - return; - struct alc_cpuid_regs regs; - __cpuid_1(0, ®s); - /* "AuthenticAMD" */ - if (regs.ebx == 0x68747541 && regs.ecx == 0x444d4163 - && regs.edx == 0x69746e65) { - cpu_features.cpu_mfg_info.mfg_type = ALC_CPU_MFG_AMD; - } - for (int i = 0; i < arr_size; i++) { - struct alc_cpuid_regs ft; - __cpuid_2(__cpuid_values[i].eax, __cpuid_values[i].ecx, &ft); - cpu_features.available[i].eax = ft.eax; - cpu_features.available[i].ebx = ft.ebx; - cpu_features.available[i].ecx = ft.ecx; - cpu_features.available[i].edx = ft.edx; - } - __get_mfg_info(&cpu_features.available[ALC_CPUID_EAX_1], mfg_info); - /* - * Globally disable some *_USEABLE flags, so that all ifunc's - * sees them - */ - if (mfg_info->mfg_type == ALC_CPU_MFG_AMD - && mfg_info->family >= ALC_CPU_FAMILY_ZEN) { - memcpy(&cpu_features.usable[0], - &cpu_features.available[0], - sizeof(cpu_features.usable)); - } - initialized = INITIALIZED_MAGIC; -} -uint32_t -alc_cpu_has_avx512f(void) -{ - __init_cpu_features(); - return ALC_CPU_HAS_AVX512F(&cpu_features); -} -uint32_t -alc_cpu_has_avx512dq(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512DQ(&cpu_features); -} -uint32_t -alc_cpu_has_avx512bw(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512BW(&cpu_features); -} -uint32_t -alc_cpu_has_avx512er(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512ER(&cpu_features); -} -uint32_t -alc_cpu_has_avx512cd(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512CD(&cpu_features); -} -uint32_t -alc_cpu_has_avx512vl(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512VL(&cpu_features); -} -uint32_t -alc_cpu_has_avx512pf(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512PF(&cpu_features); -} -uint32_t -alc_cpu_has_avx512_ifma(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512_IFMA(&cpu_features); -} -uint32_t -alc_cpu_has_avx512_vnni(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512_VNNI(&cpu_features); -} -uint32_t -alc_cpu_has_avx512_bitalg(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512_BITALG(&cpu_features); -} -uint32_t -alc_cpu_has_avx512_vbmi(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512_VBMI(&cpu_features); -} -uint32_t -alc_cpu_has_avx512_vbmi2(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512_VBMI2(&cpu_features); -} -uint32_t -alc_cpu_has_avx512_vpopcntdq(void) -{ - if (alc_cpu_has_avx512f() == 0) - return 0; - __init_cpu_features(); - return ALC_CPU_HAS_AVX512_VPOPCNTDQ(&cpu_features); -} + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#include "cpu_features.h" +#define ALC_CPU_FEATURE_REG(ftr, idx, reg) ({ \ + uint32_t val; \ + struct alc_cpuid_regs *r; \ + r = &(ftr)->available[0]; \ + val = r[(idx)].reg; \ + val; \ + }) +#define ALC_CPU_FEATURE(ptr, idx, reg, bit) ({ \ + uint32_t __reg = \ + ALC_CPU_FEATURE_REG(ptr, idx, reg); \ + (__reg & bit); \ + }) +/* For AVX512 instructions */ +#define ALC_CPU_HAS_AVX512F(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512F) /* For AVX512 foundation flag */ +#define ALC_CPU_HAS_AVX512DQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512DQ) +#define ALC_CPU_HAS_AVX512BW(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512BW) +#define ALC_CPU_HAS_AVX512ER(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512ER) +#define ALC_CPU_HAS_AVX512CD(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512CD) +#define ALC_CPU_HAS_AVX512VL(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512VL) +#define ALC_CPU_HAS_AVX512PF(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512PF) +#define ALC_CPU_HAS_AVX512_IFMA(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512_IFMA) +#define ALC_CPU_HAS_AVX512_VNNI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VNNI) +#define ALC_CPU_HAS_AVX512_BITALG(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_BITALG) +#define ALC_CPU_HAS_AVX512_VBMI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI) +#define ALC_CPU_HAS_AVX512_VBMI2(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI2) +#define ALC_CPU_HAS_AVX512_VPOPCNTDQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VPOPCNTDQ) +struct alc_cpu_features cpu_features; +struct +{ + uint32_t eax; + uint32_t ecx; +} __cpuid_values[ALC_CPUID_MAX] = { + [ALC_CPUID_EAX_1] = { 0x1, 0x0 }, /* eax = 0, ecx=0 */ + [ALC_CPUID_EAX_7] = { 0x7, 0x0 }, /* eax = 7, -"- */ + [ALC_CPUID_EAX_8_01] = { 0x80000001, 0x0 }, /* eax = 0x80000001 */ + [ALC_CPUID_EAX_8_07] = { 0x80000007, 0x0 }, /* eax = 0x80000007 */ + [ALC_CPUID_EAX_8_08] = { 0x80000008, 0x0 }, /* eax = 0x80000008 */ +}; +static inline uint32_t +__extract32(uint32_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 32 - start); + return (value >> start) & (~0U >> (32 - length)); +} +static inline uint16_t +alc_cpuid_get_family(uint32_t var) +{ + return (uint16_t)(__extract32(var, 20, 8) + + __extract32(var, 8, 4)); +} +static inline uint16_t +alc_cpuid_get_model(uint32_t var) +{ + return (uint16_t)(__extract32(var, 16, 4) << 4 | + __extract32(var, 4, 4)); +} +static inline uint16_t +alc_cpuid_get_stepping(uint32_t var) +{ + return (uint16_t)(__extract32(var, 20, 8) + + __extract32(var, 8, 4)); +} +static inline void __cpuid(struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + ); +} +static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + :"0"(eax) + ); +} +static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + :"0"(eax), "2"(ecx) + ); +} +static void +__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, + struct alc_cpu_mfg_info* mfg_info) +{ + uint16_t model; + uint16_t family; + if (mfg_info) { + struct alc_cpuid_regs regs; + __cpuid_1(1, ®s); + family = alc_cpuid_get_family(regs.eax); + model = alc_cpuid_get_model(regs.eax); + if (family >= ALC_CPU_FAMILY_ZEN) { + mfg_info->family = (uint16_t)family; + mfg_info->model = (uint16_t)model; + } + mfg_info->stepping = alc_cpuid_get_stepping(regs.eax); + } +} +static void +__init_cpu_features(void) +{ + static unsigned initialized = 0; + struct alc_cpu_mfg_info* mfg_info = &cpu_features.cpu_mfg_info; + int arr_size = ARRAY_SIZE(__cpuid_values); + if (initialized == INITIALIZED_MAGIC) + return; + struct alc_cpuid_regs regs; + __cpuid_1(0, ®s); + /* "AuthenticAMD" */ + if (regs.ebx == 0x68747541 && regs.ecx == 0x444d4163 + && regs.edx == 0x69746e65) { + cpu_features.cpu_mfg_info.mfg_type = ALC_CPU_MFG_AMD; + } + for (int i = 0; i < arr_size; i++) { + struct alc_cpuid_regs ft; + __cpuid_2(__cpuid_values[i].eax, __cpuid_values[i].ecx, &ft); + cpu_features.available[i].eax = ft.eax; + cpu_features.available[i].ebx = ft.ebx; + cpu_features.available[i].ecx = ft.ecx; + cpu_features.available[i].edx = ft.edx; + } + __get_mfg_info(&cpu_features.available[ALC_CPUID_EAX_1], mfg_info); + /* + * Globally disable some *_USEABLE flags, so that all ifunc's + * sees them + */ + if (mfg_info->mfg_type == ALC_CPU_MFG_AMD + && mfg_info->family >= ALC_CPU_FAMILY_ZEN) { + memcpy(&cpu_features.usable[0], + &cpu_features.available[0], + sizeof(cpu_features.usable)); + } + initialized = INITIALIZED_MAGIC; +} +uint32_t +alc_cpu_has_avx512f(void) +{ + __init_cpu_features(); + return ALC_CPU_HAS_AVX512F(&cpu_features); +} +uint32_t +alc_cpu_has_avx512dq(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512DQ(&cpu_features); +} +uint32_t +alc_cpu_has_avx512bw(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512BW(&cpu_features); +} +uint32_t +alc_cpu_has_avx512er(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512ER(&cpu_features); +} +uint32_t +alc_cpu_has_avx512cd(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512CD(&cpu_features); +} +uint32_t +alc_cpu_has_avx512vl(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512VL(&cpu_features); +} +uint32_t +alc_cpu_has_avx512pf(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512PF(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_ifma(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_IFMA(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vnni(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VNNI(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_bitalg(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_BITALG(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vbmi(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VBMI(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vbmi2(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VBMI2(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vpopcntdq(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VPOPCNTDQ(&cpu_features); +} diff --git a/FRAMEWORK/cpu_features.h b/FRAMEWORK/cpu_features.h index b19399b1..4ede1e6b 100644 --- a/FRAMEWORK/cpu_features.h +++ b/FRAMEWORK/cpu_features.h @@ -1,175 +1,175 @@ - -/* ************************************************************************ - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * ************************************************************************ */ -#ifndef SL_CPUFEATURES_H -#define SL_CPUFEATURES_H -#include -#include -#include -enum { - ALC_CPUID_EAX_1 = 0, - ALC_CPUID_EAX_7, - ALC_CPUID_EAX_8_01, /* 8000.0001 */ - ALC_CPUID_EAX_8_07, /* 8000.0007 */ - ALC_CPUID_EAX_8_08, /* 8000.0008 */ - /* Last entry */ - ALC_CPUID_MAX, -}; -enum { - /*EBX Values*/ - ALC_CPUID_BIT_FSGSBASE = (1u << 0), - ALC_CPUID_BIT_TSC_ADJUST = (1u << 1), - ALC_CPUID_BIT_SGX = (1u << 2), - ALC_CPUID_BIT_BMI1 = (1u << 3), - ALC_CPUID_BIT_HLE = (1u << 4), - ALC_CPUID_BIT_AVX2 = (1u << 5), - ALC_CPUID_BIT_SMEP = (1u << 7), - ALC_CPUID_BIT_BMI2 = (1u << 8), - ALC_CPUID_BIT_ERMS = (1u << 9), - ALC_CPUID_BIT_INVPCID = (1u << 10), - ALC_CPUID_BIT_RTM = (1u << 11), - ALC_CPUID_BIT_TSX = ALC_CPUID_BIT_RTM, - ALC_CPUID_BIT_PQM = (1u << 12), - ALC_CPUID_BIT_MPX = (1u << 14), - ALC_CPUID_BIT_PQE = (1u << 15), - ALC_CPUID_BIT_AVX512F = (1u << 16), - ALC_CPUID_BIT_AVX512DQ = (1u << 17), - ALC_CPUID_BIT_RDSEED = (1u << 18), - ALC_CPUID_BIT_ADX = (1u << 19), - ALC_CPUID_BIT_SMAP = (1u << 20), - ALC_CPUID_BIT_AVX512_IFMA = (1u << 21), - ALC_CPUID_BIT_CLFLUSHOPT = (1u << 22), - ALC_CPUID_BIT_CLWB = (1u << 24), - ALC_CPUID_BIT_TRACE = (1u << 25), - ALC_CPUID_BIT_AVX512PF = (1u << 26), - ALC_CPUID_BIT_AVX512ER = (1u << 27), - ALC_CPUID_BIT_AVX512CD = (1u << 28), - ALC_CPUID_BIT_SHA = (1u << 29), - ALC_CPUID_BIT_AVX512BW = (1u << 30), - ALC_CPUID_BIT_AVX512VL = (1u << 31), - /* ECX Values*/ - ALC_CPUID_BIT_PREFETCHWT1 = (1u << 0), - ALC_CPUID_BIT_AVX512_VBMI = (1u << 1), - ALC_CPUID_BIT_UMIP = (1u << 2), - ALC_CPUID_BIT_PKU = (1u << 3), - ALC_CPUID_BIT_OSPKE = (1u << 4), - ALC_CPUID_BIT_WAITPKG = (1u << 5), - ALC_CPUID_BIT_AVX512_VBMI2 = (1u << 6), - ALC_CPUID_BIT_SHSTK = (1u << 7), - ALC_CPUID_BIT_GFNI = (1u << 8), - ALC_CPUID_BIT_VAES = (1u << 9), - ALC_CPUID_BIT_VPCLMULQDQ = (1u << 10), - ALC_CPUID_BIT_AVX512_VNNI = (1u << 11), - ALC_CPUID_BIT_AVX512_BITALG = (1u << 12), - ALC_CPUID_BIT_AVX512_VPOPCNTDQ = (1u << 14), - ALC_CPUID_BIT_RDPID = (1u << 22), - ALC_CPUID_BIT_CLDEMOTE = (1u << 25), - ALC_CPUID_BIT_MOVDIRI = (1u << 27), - ALC_CPUID_BIT_MOVDIR64B = (1u << 28), - ALC_CPUID_BIT_SGX_LC = (1u << 30), - /* EDX Values */ - ALC_CPUID_BIT_AVX512_4VNNIW = (1u << 2), - ALC_CPUID_BIT_AVX512_4FMAPS = (1u << 3), - ALC_CPUID_BIT_FSRM = (1u << 4), - ALC_CPUID_BIT_PCONFIG = (1u << 18), - ALC_CPUID_BIT_IBT = (1u << 20), - ALC_CPUID_BIT_IBRS_IBPB = (1u << 26), - ALC_CPUID_BIT_STIBP = (1u << 27), - ALC_CPUID_BIT_CAPABILITIES = (1u << 29), - ALC_CPUID_BIT_SSBD = (1u << 31), -}; -#define ALC_CPU_FAMILY_ZEN 0x17 -#define ALC_CPU_FAMILY_ZEN_PLUS 0x17 -#define ALC_CPU_FAMILY_ZEN2 0x17 -#define ALC_CPU_FAMILY_ZEN3 0x19 -#define ALC_CPU_FAMILY_ZEN4 0x19 -static inline uint32_t -__extract32(uint32_t value, int start, int length); -static inline uint16_t -alc_cpuid_get_family(uint32_t var); -static inline uint16_t -alc_cpuid_get_model(uint32_t var); -static inline uint16_t -alc_cpuid_get_stepping(uint32_t var); -/* ID return values */ -struct alc_cpuid_regs { - uint32_t eax; - uint32_t ebx; - uint32_t ecx; - uint32_t edx; -}; -typedef enum { - ALC_CPU_MFG_INTEL, - ALC_CPU_MFG_AMD, - ALC_CPU_MFG_OTHER, -} alc_cpu_mfg_t; -struct alc_cpu_mfg_info { - alc_cpu_mfg_t mfg_type; - uint16_t family; - uint16_t model; - uint16_t stepping; -}; -struct alc_cpu_features { - struct alc_cpu_mfg_info cpu_mfg_info; - struct alc_cpuid_regs available[ALC_CPUID_MAX]; - struct alc_cpuid_regs usable[ALC_CPUID_MAX]; -}; -static inline void __cpuid(struct alc_cpuid_regs *out); -static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out); -static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out); -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) -#endif -#define INITIALIZED_MAGIC 0xdeadbeaf -static void -__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, - struct alc_cpu_mfg_info* mfg_info); -static void -__init_cpu_features(void); -uint32_t -alc_cpu_has_avx512f(void); -uint32_t -alc_cpu_has_avx512dq(void); -uint32_t -alc_cpu_has_avx512bw(void); -uint32_t -alc_cpu_has_avx512er(void); -uint32_t -alc_cpu_has_avx512cd(void); -uint32_t -alc_cpu_has_avx512vl(void); -uint32_t -alc_cpu_has_avx512pf(void); -uint32_t -alc_cpu_has_avx512_ifma(void); -uint32_t -alc_cpu_has_avx512_vnni(void); -uint32_t -alc_cpu_has_avx512_bitalg(void); -uint32_t -alc_cpu_has_avx512_vbmi(void); -uint32_t -alc_cpu_has_avx512_vbmi2(void); -uint32_t -alc_cpu_has_avx512_vpopcntdq(void); -#endif //SL_CPUFEATURES_H + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef SL_CPUFEATURES_H +#define SL_CPUFEATURES_H +#include +#include +#include +enum { + ALC_CPUID_EAX_1 = 0, + ALC_CPUID_EAX_7, + ALC_CPUID_EAX_8_01, /* 8000.0001 */ + ALC_CPUID_EAX_8_07, /* 8000.0007 */ + ALC_CPUID_EAX_8_08, /* 8000.0008 */ + /* Last entry */ + ALC_CPUID_MAX, +}; +enum { + /*EBX Values*/ + ALC_CPUID_BIT_FSGSBASE = (1u << 0), + ALC_CPUID_BIT_TSC_ADJUST = (1u << 1), + ALC_CPUID_BIT_SGX = (1u << 2), + ALC_CPUID_BIT_BMI1 = (1u << 3), + ALC_CPUID_BIT_HLE = (1u << 4), + ALC_CPUID_BIT_AVX2 = (1u << 5), + ALC_CPUID_BIT_SMEP = (1u << 7), + ALC_CPUID_BIT_BMI2 = (1u << 8), + ALC_CPUID_BIT_ERMS = (1u << 9), + ALC_CPUID_BIT_INVPCID = (1u << 10), + ALC_CPUID_BIT_RTM = (1u << 11), + ALC_CPUID_BIT_TSX = ALC_CPUID_BIT_RTM, + ALC_CPUID_BIT_PQM = (1u << 12), + ALC_CPUID_BIT_MPX = (1u << 14), + ALC_CPUID_BIT_PQE = (1u << 15), + ALC_CPUID_BIT_AVX512F = (1u << 16), + ALC_CPUID_BIT_AVX512DQ = (1u << 17), + ALC_CPUID_BIT_RDSEED = (1u << 18), + ALC_CPUID_BIT_ADX = (1u << 19), + ALC_CPUID_BIT_SMAP = (1u << 20), + ALC_CPUID_BIT_AVX512_IFMA = (1u << 21), + ALC_CPUID_BIT_CLFLUSHOPT = (1u << 22), + ALC_CPUID_BIT_CLWB = (1u << 24), + ALC_CPUID_BIT_TRACE = (1u << 25), + ALC_CPUID_BIT_AVX512PF = (1u << 26), + ALC_CPUID_BIT_AVX512ER = (1u << 27), + ALC_CPUID_BIT_AVX512CD = (1u << 28), + ALC_CPUID_BIT_SHA = (1u << 29), + ALC_CPUID_BIT_AVX512BW = (1u << 30), + ALC_CPUID_BIT_AVX512VL = (1u << 31), + /* ECX Values*/ + ALC_CPUID_BIT_PREFETCHWT1 = (1u << 0), + ALC_CPUID_BIT_AVX512_VBMI = (1u << 1), + ALC_CPUID_BIT_UMIP = (1u << 2), + ALC_CPUID_BIT_PKU = (1u << 3), + ALC_CPUID_BIT_OSPKE = (1u << 4), + ALC_CPUID_BIT_WAITPKG = (1u << 5), + ALC_CPUID_BIT_AVX512_VBMI2 = (1u << 6), + ALC_CPUID_BIT_SHSTK = (1u << 7), + ALC_CPUID_BIT_GFNI = (1u << 8), + ALC_CPUID_BIT_VAES = (1u << 9), + ALC_CPUID_BIT_VPCLMULQDQ = (1u << 10), + ALC_CPUID_BIT_AVX512_VNNI = (1u << 11), + ALC_CPUID_BIT_AVX512_BITALG = (1u << 12), + ALC_CPUID_BIT_AVX512_VPOPCNTDQ = (1u << 14), + ALC_CPUID_BIT_RDPID = (1u << 22), + ALC_CPUID_BIT_CLDEMOTE = (1u << 25), + ALC_CPUID_BIT_MOVDIRI = (1u << 27), + ALC_CPUID_BIT_MOVDIR64B = (1u << 28), + ALC_CPUID_BIT_SGX_LC = (1u << 30), + /* EDX Values */ + ALC_CPUID_BIT_AVX512_4VNNIW = (1u << 2), + ALC_CPUID_BIT_AVX512_4FMAPS = (1u << 3), + ALC_CPUID_BIT_FSRM = (1u << 4), + ALC_CPUID_BIT_PCONFIG = (1u << 18), + ALC_CPUID_BIT_IBT = (1u << 20), + ALC_CPUID_BIT_IBRS_IBPB = (1u << 26), + ALC_CPUID_BIT_STIBP = (1u << 27), + ALC_CPUID_BIT_CAPABILITIES = (1u << 29), + ALC_CPUID_BIT_SSBD = (1u << 31), +}; +#define ALC_CPU_FAMILY_ZEN 0x17 +#define ALC_CPU_FAMILY_ZEN_PLUS 0x17 +#define ALC_CPU_FAMILY_ZEN2 0x17 +#define ALC_CPU_FAMILY_ZEN3 0x19 +#define ALC_CPU_FAMILY_ZEN4 0x19 +static inline uint32_t +__extract32(uint32_t value, int start, int length); +static inline uint16_t +alc_cpuid_get_family(uint32_t var); +static inline uint16_t +alc_cpuid_get_model(uint32_t var); +static inline uint16_t +alc_cpuid_get_stepping(uint32_t var); +/* ID return values */ +struct alc_cpuid_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; +typedef enum { + ALC_CPU_MFG_INTEL, + ALC_CPU_MFG_AMD, + ALC_CPU_MFG_OTHER, +} alc_cpu_mfg_t; +struct alc_cpu_mfg_info { + alc_cpu_mfg_t mfg_type; + uint16_t family; + uint16_t model; + uint16_t stepping; +}; +struct alc_cpu_features { + struct alc_cpu_mfg_info cpu_mfg_info; + struct alc_cpuid_regs available[ALC_CPUID_MAX]; + struct alc_cpuid_regs usable[ALC_CPUID_MAX]; +}; +static inline void __cpuid(struct alc_cpuid_regs *out); +static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out); +static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out); +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif +#define INITIALIZED_MAGIC 0xdeadbeaf +static void +__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, + struct alc_cpu_mfg_info* mfg_info); +static void +__init_cpu_features(void); +uint32_t +alc_cpu_has_avx512f(void); +uint32_t +alc_cpu_has_avx512dq(void); +uint32_t +alc_cpu_has_avx512bw(void); +uint32_t +alc_cpu_has_avx512er(void); +uint32_t +alc_cpu_has_avx512cd(void); +uint32_t +alc_cpu_has_avx512vl(void); +uint32_t +alc_cpu_has_avx512pf(void); +uint32_t +alc_cpu_has_avx512_ifma(void); +uint32_t +alc_cpu_has_avx512_vnni(void); +uint32_t +alc_cpu_has_avx512_bitalg(void); +uint32_t +alc_cpu_has_avx512_vbmi(void); +uint32_t +alc_cpu_has_avx512_vbmi2(void); +uint32_t +alc_cpu_has_avx512_vpopcntdq(void); +#endif //SL_CPUFEATURES_H From f7a6dda21a4f46e9733ee0a26ffeb338c5fa4ca1 Mon Sep 17 00:00:00 2001 From: arunchan Date: Wed, 8 Mar 2023 13:05:00 +0530 Subject: [PATCH 14/30] Enhance the scalapack testing script New script supports running scalapack testing applications against various MPI binding and mapping configurations, different mpi ranks etc. It also contains commandline options to change the default parameters used in the test. Change-Id: I0e7c437381352e46690f13beb36c4446fe09d837 --- scalapack_test.sh | 947 ++++++++++++++++++++++++---------------------- 1 file changed, 493 insertions(+), 454 deletions(-) mode change 100644 => 100755 scalapack_test.sh diff --git a/scalapack_test.sh b/scalapack_test.sh old mode 100644 new mode 100755 index 8b29076e..da035471 --- a/scalapack_test.sh +++ b/scalapack_test.sh @@ -1,454 +1,493 @@ -#!/usr/bin/env bash - -echo " " -echo " Scalapack Testing Started " - -cd TESTING -echo "SCALAPACK Test suite:" >>../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt - -echo " xcbrd " >>../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcdblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcdblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcdtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcdtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcevc " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcevc >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcheevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcheevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xchrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xchrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xclu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xclu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcnep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcnep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcpbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcpbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xctrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xctrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdbrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xddblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xddblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xddtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xddtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdhrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdhrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdhseqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdhseqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdnep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdnep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdpbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdpbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdsvd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdsvd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdsyevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdsyevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdtrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdtrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsbrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsdblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsdblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsdtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsdtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xshrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xshrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xshseqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xshseqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xslu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xslu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsnep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsnep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xspbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xspbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xssep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xssep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xssvd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xssvd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xssyevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xssyevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xstrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xstrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzbrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzdblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzdblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzdtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzdtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzevc " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzevc >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzheevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzheevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzhrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzhrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xznep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xznep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzpbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzpbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xztrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xztrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt - +#!/bin/bash + +# This script runs the scalapack testing programs against various mpi +# configurations. Test results will be saved in the folder +# $HOME/aocl_scalapack_testing_results +# +# The below default options will be used if it is run without any +# commandline arguments +# a) MPI ranks => Maximum number of available cpu cores in the system +# b) Test programs run => all, All the scalapack testing programs +# present in the TESTING folder will be run +# c) MPI flavour => It will look for the mpirun executable in the +# PATH variable and corresponding MPI installation will be used. +# d) MPI binding, mapping => The test will be performed only +# with 'map-by core' and 'bind-to core' +# +# User can change this behaviour with the command line options +# +# Eg: To test only single precision cholesky transformation for all +# the MPI mapping for ranks between 4 to 16 use +# $ scalapack_test.sh -t xsllt -s 4 -i 1 -e 16 -c all +# +# Eg: To test all the programs with maximum avialable ranks +# with MPI mapping "map-by l3cache" +# $ scalapack_test.sh -t all -c map_l3cache +# +# To print all the supported options run it with the argument -h + +#Default values for the test +num_sample_to_collect=1 +test_execution_dir="./TESTING" +user_input_test="all" +test_description="scalapack_default_test" +mpi_mapping_binding="map_core_bind_core" + +scalapack_test_list_fast=( +"xcdtlu" +"xcgblu" +"xcinv" +"xclu" +"xcnep" +"xcptllt" +"xcqr" +"xcsep" +"xdbrd" +"xddblu" +"xddtlu" +"xdhrd" +"xdlu" +"xdpbllt" +"xdptllt" +"xdsvd" +"xsbrd" +"xsgblu" +"xsgsep" +"xshrd" +"xspbllt" +"xsptllt" +"xzevc" +"xzinv" +"xznep" +"xzptllt" +) + +scalapack_test_list_normal=( +"xcbrd" +"xcdblu" +"xcevc" +"xcgsep" +"xchrd" +"xcllt" +"xcls" +"xcpbllt" +"xctrd" +"xdgblu" +"xdgsep" +"xdhseqr" +"xdinv" +"xdllt" +"xdls" +"xdnep" +"xdqr" +"xdsep" +"xdtrd" +"xsdblu" +"xsdtlu" +"xshseqr" +"xsinv" +"xsllt" +"xsls" +"xslu" +"xsnep" +"xsqr" +"xssep" +"xssvd" +"xstrd" +"xzbrd" +"xzdblu" +"xzdtlu" +"xzgblu" +"xzgsep" +"xzhrd" +"xzllt" +"xzls" +"xzlu" +"xzpbllt" +"xzqr" +"xzsep" +"xztrd" +) + +scalapack_test_list_slow=( +"xdsyevr" +"xssyevr" +"xcheevr" +"xzheevr" +) + +scalapack_test_list_execute=() +scalapack_test_list_mpifail=() + +mpi_map_bind_supported_list=( +"map_core_bind_core --map-by core --bind-to core" +"map_l3cache_bind_core --map-by l3cache --bind-to core" +"map_numa_bind_core --map-by numa --bind-to core" +"map_socket_bind_core --map-by socket --bind-to core" +"map_l3cache --map-by l3cache" +"map_numa --map-by numa" +"map_socket --map-by socket" +) + +mpi_map_bind_testing_list=() + +usage() +{ + echo -e "scalapack_test.sh -t + -m + -d + -f + -s + -i + -e + -x + -n + -c + -h " +} + +num_mpi_ranks_step=$(nproc --all) +num_mpi_ranks_start=$(nproc --all) +num_mpi_ranks_end=$(nproc --all) + +while getopts "t:m:d:f:s:i:e:x:n:c:h" OPTION +do + case $OPTION in + h) + usage + exit 0 + ;; + t) + user_input_test="$OPTARG" + ;; + m) + mpi_install_path="$OPTARG" + ;; + d) + test_description="$OPTARG" + ;; + f) + test_execution_dir="$OPTARG" + ;; + s) + num_mpi_ranks_start="$OPTARG" + ;; + i) + num_mpi_ranks_step="$OPTARG" + ;; + e) + num_mpi_ranks_end="$OPTARG" + ;; + n) + num_sample_to_collect="$OPTARG" + ;; + c) + mpi_mapping_binding="$OPTARG" + ;; + x) + for tst in ${OPTARG//,/ } + do + if [ "$tst" == "fast" ] ; then + scalapack_test_list_exclude+=( ${scalapack_test_list_fast[@]} ) + elif [ "$tst" == "normal" ] ; then + scalapack_test_list_exclude+=( ${scalapack_test_list_normal[@]} ) + elif [ "$tst" == "slow" ] ; then + scalapack_test_list_exclude+=( ${scalapack_test_list_slow[@]} ) + else + scalapack_test_list_exclude+=( $tst ) + fi + done + ;; + ?) + usage + exit 1 + ;; + esac +done + +# Verify the user supplied mapping and binding option (-c) +# If the check is successful set map_bind_found to 1 +map_bind_found=0 +if [ "$mpi_mapping_binding" == "all" ] ; then + mpi_map_bind_testing_list+=( "${mpi_map_bind_supported_list[@]}" ) + map_bind_found=1 +else + for mpi_map_bind in "${mpi_map_bind_supported_list[@]}" + do + map_name=$(echo $mpi_map_bind | cut -d' ' -f1) + + if [ "$map_name" == "$mpi_mapping_binding" ] ; then + mpi_map_bind_testing_list+=( "$mpi_map_bind" ) + map_bind_found=1 + fi + done +fi + +if [ "$map_bind_found" -eq 0 ] ; then + echo "The supplied $mpi_mapping_binding is wrong. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +# Verify the user supplied test (-t) +# If the check is successful set test_found to 1 +test_found=0 +if [ -z "$user_input_test" ] ; then + echo "No test input. Specify a test name or 'fast, normal, slow, all'. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +else + if [ "$user_input_test" == "fast" ] || [ "$user_input_test" == "all" ]; then + scalapack_test_list_execute+=( ${scalapack_test_list_fast[@]} ) + test_found=1 + fi + + if [ "$user_input_test" == "normal" ] || [ "$user_input_test" == "all" ]; then + scalapack_test_list_execute+=( ${scalapack_test_list_normal[@]} ) + test_found=1 + fi + + if [ "$user_input_test" == "slow" ] || [ "$user_input_test" == "all" ]; then + scalapack_test_list_execute+=( ${scalapack_test_list_slow[@]} ) + test_found=1 + fi + + if [ "$test_found" -eq 0 ] ; then + for tst in ${scalapack_test_list_fast[@]} + do + if [ "$user_input_test" == "$tst" ] ; then + scalapack_test_list_execute+=( $tst ) + test_found=1 + fi + done + + for tst in ${scalapack_test_list_normal[@]} + do + if [ "$user_input_test" == "$tst" ] ; then + scalapack_test_list_execute+=( $tst ) + test_found=1 + fi + done + + for tst in ${scalapack_test_list_slow[@]} + do + if [ "$user_input_test" == "$tst" ] ; then + scalapack_test_list_execute+=( $tst ) + test_found=1 + fi + done + fi +fi + +if [ "$test_found" -eq 0 ] ; then + echo "Wrong test input. Specify a valid test name or 'fast, normal, slow, all'. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +# check for a valid mpirun in the default PATH if user did not supply any(-m) +if [ -z "$mpi_install_path" ] ; then +# Try to find mpirun in PATH variable + mpirun_path=$(which mpirun) + mpirun_path=$(echo $mpirun_path | rev | cut -d'/' -f3- | rev) + + if [ -z "$mpirun_path" ] ; then + echo "Unable to find mpirun. Exiting !!!" + exit 1 + fi + + mpi_install_path=$mpirun_path +fi + +if [ -z "$mpi_install_path" ] ; then + echo "Please specify mpi install path with -m, can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +else + if [ ! -f $mpi_install_path/bin/mpicc ] ; then + echo "Unable to find mpicc @ $mpi_install_path/bin/mpicc" + echo "Please specify a valid mpi install path with -m, can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 + fi +fi + +# Check the scalapack test executables in user supplied/default folder +if [ -z "$test_execution_dir" ] ; then + echo "Please specify the path to scalapack TESTING folder. can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +else + for scalapack_test in ${scalapack_test_list_execute[@]} + do + if [ ! -f $test_execution_dir/$scalapack_test ] ; then + echo "Unable to find $scalapack_test exe in $test_execution_dir. can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 + fi + done +fi + +if [ -z "$test_description" ] ; then + echo "Please specify a test description. can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +output_dir="$HOME/aocl_scalapack_testing_results/" +dir_str=$(date +%b_%d_%Y_%H_%M_%S | tr '[:upper:]' '[:lower:]') +test_description=$(echo $test_description | tr -s ' ' | tr ' ' '_') +result_folder="$test_description""_""$dir_str" + +#set mpi binary and library path +PATH=$mpi_install_path/bin:$PATH; export PATH +LD_LIBRARY_PATH=$mpi_install_path/lib:$LD_LIBRARY_PATH; export LD_LIBRARY_PATH + +test_execution_dir=$(readlink -f $test_execution_dir) + +cd $test_execution_dir +if [ $? -ne 0 ]; then + echo "Unable to change directory to path $test_execution_dir. Please specify a valid scalapack TESTING folder. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +mkdir -p $output_dir/$result_folder + +test_log_file=$output_dir/$result_folder/test_log.txt +test_env_file=$output_dir/$result_folder/test_env.txt +echo -e "test_description:$test_description" > $test_env_file +echo -e "scalapack TESTING dir:$test_execution_dir" >> $test_env_file +echo -e "mpi path used for testing:$mpi_install_path" >> $test_env_file +echo -e "\nenv PATH:$PATH" >> $test_env_file +echo -e "\nenv LD_LIBRARY_PATH:$LD_LIBRARY_PATH" >> $test_env_file +echo -e "\nenv OMP_NUM_THREADS:$OMP_NUM_THREADS" >> $test_env_file +echo -e "\nenv LD_PRELOAD:$LD_PRELOAD" >> $test_env_file +echo -e "\nMPI configuration:" >> $test_env_file + +if [ -f $mpi_install_path/bin/ompi_info ] ; then + echo -e "$(ompi_info)" >> $test_env_file +elif [ -f $mpi_install_path/bin/impi_info ] ; then + echo -e "$(mpirun --version)\n" >> $test_env_file +else + echo -e "Unknown MPI installation" >> $test_env_file +fi + +echo "--------------------------------------------------------" >> $test_log_file +echo "Main test loop started @ $(date)" >> $test_log_file +echo "--------------------------------------------------------" >> $test_log_file + +##Loop over tests +for mpi_map_bind_var in "${mpi_map_bind_testing_list[@]}" +do + test_name=$(echo $mpi_map_bind_var | cut -d' ' -f1) + mpi_opt=$(echo $mpi_map_bind_var | cut -d' ' -f2-) + folder1="$output_dir/$result_folder/$test_name" + + echo "$test_name started @ $(date)" >> $test_log_file + + mkdir "$folder1" + for scalapack_test in ${scalapack_test_list_execute[@]} + do + for scalapack_exclude_test in ${scalapack_test_list_exclude[@]} + do + if [ "$scalapack_test" == "$scalapack_exclude_test" ] ; then + continue 2 + fi + done + + folder2="$folder1/$scalapack_test" + mkdir "$folder2" + + for num_core in `seq $num_mpi_ranks_start $num_mpi_ranks_step $num_mpi_ranks_end` + do + folder3="$folder2/result_nproc_$num_core" + mkdir "$folder3" + + echo "Executing $test_name:$scalapack_test with $num_core MPI ranks. @ $(date)" + + declare -i total_time=0 + declare -i start_time=0 + declare -i end_time=0 + + for (( i = 0; i < $num_sample_to_collect; i++)); + do + folder4="$folder3/sample_$i" + mkdir "$folder4" + + # Execute the test + start_time=$(date +%s%N) + result_str=$(mpirun -np $num_core $mpi_opt ./$scalapack_test 2>&1) + mpirun_exit_code=$? + end_time=$(date +%s%N) + total_time=$(( total_time + (end_time - start_time) )) + + echo "$result_str" > $folder4/result.txt + + if [ $mpirun_exit_code -ne 0 ] ; then + echo "mpirun failed for the test:$scalapack_test" | tee -a $test_log_file + scalapack_test_list_mpifail+=( $scalapack_test ) + fi + done + echo "#__test_runtime__:$test_name:$scalapack_test:$(( total_time / 1000000 )) ms:$num_core" >> $test_log_file + done + done + echo "$test_name ended @ $(date)" >> $test_log_file +done + +echo "--------------------------------------------------------" >> $test_log_file +echo "Main test loop ended @ $(date)" >> $test_log_file +echo "--------------------------------------------------------" >> $test_log_file + +failed_file_list=$(find $output_dir/$result_folder -name result.txt | xargs grep -l FAILED) +if [ -z "$failed_file_list" ] ; then + echo "Number of scalapack routines failed: 0" | tee -a $test_log_file +else + echo "Some of the scalapack routines failed. Please find the result_files with failed routines" | tee -a $test_log_file + for fail_result in $failed_file_list + do + echo $fail_result | tee -a $test_log_file + done +fi + +if [ ${#scalapack_test_list_mpifail[@]} -eq 0 ]; then + echo "Number of mpirun failures: 0" | tee -a $test_log_file +else + echo "Some of the scalapack test applications failed in mpirun. Please find it below" | tee -a $test_log_file + for mpi_fail in "${scalapack_test_list_mpifail[@]}" + do + echo $mpi_fail | tee -a $test_log_file + done +fi + +scalapack_test_list_skipped=() +skip_file_check_list=$(find $output_dir/$result_folder -name result.txt) +for skip_check in $skip_file_check_list +do + match=$(grep -a "tests skipped" $skip_check | awk '{print $1}') + + if [ ! -z $match ] ; then + if [ "$match" -ne "0" ] ; then + scalapack_test_list_skipped+=( $skip_check ) + fi + fi +done + +if [ ${#scalapack_test_list_skipped[@]} -eq 0 ]; then + echo "Number of scalapack routines skipped: 0" | tee -a $test_log_file +else + echo "Some of the scalapack routines skipped. Please find the result_files with skipped routines" | tee -a $test_log_file + for mpi_skip in "${scalapack_test_list_skipped[@]}" + do + echo $mpi_skip | tee -a $test_log_file + done +fi From 658a19c60bd4520a933bb442ac0231c81a6ef03b Mon Sep 17 00:00:00 2001 From: nprasadm Date: Thu, 2 Mar 2023 16:15:06 +0530 Subject: [PATCH 15/30] aocl-scaLAPACK: Build option added to run the scaLAPACK test suite with large size matrices. 1) The existing workbuffer in test-suite application is small and allocated statically, this makes it impossible to test matrix sizes bigger than 2000. 2) The config flag 'ENABLE_LARGE_MATRIX_TESTING' allows to set the higher workbuffer size, needed to test larger matrices by allocating the memory dynamically. 3) The optional config flag "-DWORK_BUFFER_SIZE=custom_size_bytes" to customize the workbuffer (as per the machine memory availability) is also provided. Signed-off-by: Nagendra AMD-Internal: [CPUPL-3028], [CPUPL-1880] Change-Id: I499bd0cbd7e02d438cc4eee4a918a5b8f609b7b5 --- CMakeLists.txt | 31 ++++++++++++++++++++++++++++++- TESTING/LIN/pdinvdriver.f | 5 +++-- TESTING/LIN/pdludriver.f | 3 +-- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4161fa95..40dd1cfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,15 +18,27 @@ set(CMAKE_MODULE_PATH "${SCALAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) set(SL_FRAMEWORK_INCLUDE_PATH "${SCALAPACK_SOURCE_DIR}/FRAMEWORK") -#Build Options +# ------ Build Options List ----------- # ILP64 build option option(ENABLE_ILP64 "Enable ILP64 " OFF) + +# aocl progress option option(ENABLE_AOCL_PROGRESS "Enable progress feature " OFF) + +# DTL option option(ENABLE_DTL "Enable DTL feature " OFF) # Option: Include build number in the version string. option (ENABLE_SET_LIB_VERSION "Set library version" OFF) + +# Option: Dynamic allocation of work buffer memory in Test code +# Helpful to test larger matrix sizes more than 2K +option (ENABLE_LARGE_MATRIX_TESTING "Dynamic allocation of work buffer memory in test code" OFF) + +# ------ Build Options List End ----------- + + if (WIN32 AND CMAKE_Fortran_COMPILER_ID MATCHES "Intel") set (CMAKE_IFORT_LIBDEPS_DIR "C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/compiler/lib/intel64_win" CACHE STRING "") endif() @@ -34,6 +46,23 @@ endif() set(CMAKE_ICC_FLAGS " ") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp" ) +# Option to enable the scaLAPACK test-suite with "Dynamic work buffer memory allocation". +if(ENABLE_LARGE_MATRIX_TESTING) + +# Configure option to customize Dynamic work buffer size "-DWORK_BUFFER_SIZE = custom_size Bytes" +# as per the machine's memory configuration. + if(WORK_BUFFER_SIZE) + message(STATUS "scaLAPACK test-suite build with custom dynamic work_buffer_size: ${WORK_BUFFER_SIZE} bytes.") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DDYNAMIC_WORK_MEM_ALLOC -DWORK_BUFFER_SIZE=${WORK_BUFFER_SIZE}" ) + else(WORK_BUFFER_SIZE) +# Build with Dynamic work buffer size = 2100000000 bytes by default. + message(STATUS "scaLAPACK test-suite build with dynamic work_buffer_size: 2100000000 bytes.") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DDYNAMIC_WORK_MEM_ALLOC -DWORK_BUFFER_SIZE=2100000000 " ) + endif(WORK_BUFFER_SIZE) + +endif(ENABLE_LARGE_MATRIX_TESTING) + + if (UNIX) if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-none -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) diff --git a/TESTING/LIN/pdinvdriver.f b/TESTING/LIN/pdinvdriver.f index 0a8a5410..4503ce07 100644 --- a/TESTING/LIN/pdinvdriver.f +++ b/TESTING/LIN/pdinvdriver.f @@ -64,7 +64,7 @@ PROGRAM PDINVDRIVER PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) - INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM + INTEGER DBLESZ, INTGSZ, NTESTS, TOTMEM #ifdef ENABLE_ILP64 PARAMETER ( INTGSZ = 8 ) #else @@ -72,12 +72,13 @@ PROGRAM PDINVDRIVER #endif * #ifndef DYNAMIC_WORK_MEM_ALLOC + INTEGER MEMSIZ DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) #else - INTEGER, PARAMETER :: MEMSIZ = 2100000000 + INTEGER, PARAMETER :: MEMSIZ = WORK_BUFFER_SIZE DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ NTESTS = 20, diff --git a/TESTING/LIN/pdludriver.f b/TESTING/LIN/pdludriver.f index 09e04f62..52dc6e4d 100644 --- a/TESTING/LIN/pdludriver.f +++ b/TESTING/LIN/pdludriver.f @@ -87,8 +87,7 @@ PROGRAM PDLUDRIVER $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) #else INTEGER DBLESZ, NTESTS - INTEGER, PARAMETER :: MEMSIZ = 2100000000 - + INTEGER, PARAMETER :: MEMSIZ = WORK_BUFFER_SIZE DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, $ NTESTS = 20, From a53e81da4643e55d38deb320c1991af6ff52a46a Mon Sep 17 00:00:00 2001 From: nprasadm Date: Wed, 22 Mar 2023 19:27:49 +0530 Subject: [PATCH 16/30] aocl-scaLAPACK: Trace and Logging feature enabled for 40 double data type APIs. Signed-off-by: Nagendra AMD-Internal: [CPUPL-2700] Change-Id: I8e3c7be78c2767891e9c81e3b92dc931ceebee48 --- FRAMEWORK/SL_Context_fortran_include.h | 2 +- SRC/pddbsv.f | 30 ++++++++++++++++ SRC/pddbtrf.f | 37 +++++++++++++++++-- SRC/pddbtrs.f | 44 ++++++++++++++++++++--- SRC/pddbtrsv.f | 44 ++++++++++++++++++++--- SRC/pddtsv.f | 29 +++++++++++++++ SRC/pddttrf.f | 37 +++++++++++++++++-- SRC/pddttrs.f | 43 +++++++++++++++++++--- SRC/pddttrsv.f | 43 +++++++++++++++++++--- SRC/pdgbsv.f | 30 ++++++++++++++++ SRC/pdgbtrf.f | 37 +++++++++++++++++-- SRC/pdgbtrs.f | 44 ++++++++++++++++++++--- SRC/pdgebal.f | 28 +++++++++++++++ SRC/pdgebd2.f | 29 +++++++++++++++ SRC/pdgebrd.f | 36 +++++++++++++++++-- SRC/pdgecon.f | 33 +++++++++++++++++ SRC/pdgeequ.f | 42 +++++++++++++++++++--- SRC/pdgehd2.f | 28 +++++++++++++++ SRC/pdgehrd.f | 34 ++++++++++++++++-- SRC/pdgelq2.f | 34 ++++++++++++++++-- SRC/pdgelqf.f | 34 ++++++++++++++++-- SRC/pdgels.f | 31 ++++++++++++++++ SRC/pdgeql2.f | 34 ++++++++++++++++-- SRC/pdgeqlf.f | 34 ++++++++++++++++-- SRC/pdgeqpf.f | 38 +++++++++++++++++--- SRC/pdgeqr2.f | 34 ++++++++++++++++-- SRC/pdgeqrf.f | 36 ++++++++++++++++--- SRC/pdgerfs.f | 33 +++++++++++++++++ SRC/pdgerq2.f | 34 ++++++++++++++++-- SRC/pdgerqf.f | 34 ++++++++++++++++-- SRC/pdgesv.f | 27 ++++++++++++++ SRC/pdgesvd.f | 30 ++++++++++++++++ SRC/pdgesvx.f | 35 ++++++++++++++++++ SRC/pdgetf2.f | 39 +++++++++++++------- SRC/pdgetf2K.f | 40 +++++++++++++++++---- SRC/pdgetf2_comm.f | 27 ++++++++++++-- SRC/pdgetri.f | 50 +++++++++++++++++++++----- SRC/pdgetrs.f | 34 ++++++++++++++++-- SRC/pdggqrf.f | 29 +++++++++++++++ SRC/pdggrqf.f | 29 +++++++++++++++ SRC/pdlaswp.f | 37 +++++++++++++------ 41 files changed, 1302 insertions(+), 101 deletions(-) diff --git a/FRAMEWORK/SL_Context_fortran_include.h b/FRAMEWORK/SL_Context_fortran_include.h index a3401a4e..16779a16 100644 --- a/FRAMEWORK/SL_Context_fortran_include.h +++ b/FRAMEWORK/SL_Context_fortran_include.h @@ -30,7 +30,7 @@ #define AOCL_DTL_TRACE_EXIT_F CONTINUE #define aocl_scalapack_init_ AOCL_SCALAPACK_INIT #else -#define AOCL_DTL_TRACE_ENTRY_F CALL SL_DTL_TRACE_ENTRY_F(FILE_NAME, __LINE__, ' ') +#define AOCL_DTL_TRACE_ENTRY_F CALL AOCL_SL_DTL_TRACE_ENTRY(FILE_NAME, __LINE__, ' ') #define AOCL_DTL_TRACE_EXIT_F CALL SL_DTL_TRACE_EXIT_F (FILE_NAME, __LINE__, ' ') #endif diff --git a/SRC/pddbsv.f b/SRC/pddbsv.f index 70f9bc5a..b5841339 100644 --- a/SRC/pddbsv.f +++ b/SRC/pddbsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. @@ -382,7 +389,26 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, * .. External Subroutines .. EXTERNAL PDDBTRF, PDDBTRS, PXERBLA * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbsv.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) BWL, BWU, IB, INFO, + $ JA, LWORK, N, NRHS + 102 FORMAT('PDDBSV inputs: + $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5,' NRH + $ S: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines @@ -405,6 +431,7 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDDBSV', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * @@ -427,6 +454,7 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDDBSV', -INFO ) ENDIF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -440,9 +468,11 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBSV', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDBSV diff --git a/SRC/pddbtrf.f b/SRC/pddbtrf.f index 5743f329..cfc6dcf8 100644 --- a/SRC/pddbtrf.f +++ b/SRC/pddbtrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. @@ -381,7 +388,25 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbtrf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) BWL, BWU, INFO, JA, + $ LAF, LWORK, N + 102 FORMAT('PDDBTRF inputs: + $ BWL: ', I5,' BWU: ', I5,' INFO: ', I5,' JA: ', + $ I5,' LAF: ', I5,' LWORK: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -461,12 +486,14 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDDBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRF, D&C alg.: NB too small', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -481,6 +508,7 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDDBTRF: auxiliary storage error ', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -495,6 +523,7 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, INFO = -10 CALL PXERBLA( ICTXT, 'PDDBTRF: worksize error ', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -550,13 +579,16 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1244,6 +1276,7 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, END IF * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDBTRF diff --git a/SRC/pddbtrs.f b/SRC/pddbtrs.f index 3f6c339c..867b11f3 100644 --- a/SRC/pddbtrs.f +++ b/SRC/pddbtrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -395,7 +402,27 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbtrs.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, BWL, BWU, IB, + $ INFO, JA, LAF, LWORK, N, NRHS + 102 FORMAT('PDDBTRS inputs: + $ TRANS: ', A5,' + $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LAF: ', I5,' LWORK: ', I5,' N + $ : ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -528,12 +555,14 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDDBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRS, D&C alg.: NB too small', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -547,6 +576,7 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, INFO = -15 CALL PXERBLA( ICTXT, 'PDDBTRS: worksize error', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -618,16 +648,21 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRS', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * - IF( NRHS.EQ.0 ) - $ RETURN + IF( NRHS.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -740,6 +775,7 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, WORK( 1 ) = WORK_SIZE_MIN * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDBTRS diff --git a/SRC/pddbtrsv.f b/SRC/pddbtrsv.f index 293d12c4..860f1f5a 100644 --- a/SRC/pddbtrsv.f +++ b/SRC/pddbtrsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -409,7 +416,27 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbtrsv.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, UPLO, BWL, BWU, + $ IB, INFO, JA, LAF, LWORK, N, NRHS + 102 FORMAT('PDDBTRSV inputs: + $ TRANS: ', A5,' UPLO: ', A5,' + $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LAF: ', I5,' LWORK: ', I5,' N + $ : ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -556,6 +583,7 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, CALL PXERBLA( ICTXT, $ 'PDDBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -563,6 +591,7 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRSV, D&C alg.: NB too small', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -576,6 +605,7 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, INFO = -16 CALL PXERBLA( ICTXT, 'PDDBTRSV: worksize error', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -649,16 +679,21 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRSV', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * - IF( NRHS.EQ.0 ) - $ RETURN + IF( NRHS.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1542,6 +1577,7 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, WORK( 1 ) = WORK_SIZE_MIN * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDBTRSV diff --git a/SRC/pddtsv.f b/SRC/pddtsv.f index 241f4f9b..a6b8a73a 100644 --- a/SRC/pddtsv.f +++ b/SRC/pddtsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. @@ -392,7 +399,25 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, * .. External Subroutines .. EXTERNAL PDDTTRF, PDDTTRS, PXERBLA * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddtsv.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IB, INFO, JA, LWORK, + $ N, NRHS + 102 FORMAT('PDDTSV inputs: + $ IB: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' N: ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines @@ -418,6 +443,7 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDDTSV', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * @@ -440,6 +466,7 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDDTSV', -INFO ) ENDIF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -453,9 +480,11 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTSV', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDTSV diff --git a/SRC/pddttrf.f b/SRC/pddttrf.f index 56cbaa19..df1a410c 100644 --- a/SRC/pddttrf.f +++ b/SRC/pddttrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. @@ -391,7 +398,25 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddttrf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) INFO, JA, LAF, LWORK, + $ N + 102 FORMAT('PDDTTRF inputs: + $ INFO: ', I5,' JA: ', I5,' LAF: ', I5,' LWORK: ' + $ , I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -458,12 +483,14 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDDTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRF, D&C alg.: NB too small', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -478,6 +505,7 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDDTTRF: auxiliary storage error ', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -492,6 +520,7 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -10 CALL PXERBLA( ICTXT, 'PDDTTRF: worksize error ', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -543,13 +572,16 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1036,6 +1068,7 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, END IF * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDTTRF diff --git a/SRC/pddttrs.f b/SRC/pddttrs.f index bb7f8cd3..6b7c725d 100644 --- a/SRC/pddttrs.f +++ b/SRC/pddttrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, $ DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS @@ -410,7 +417,26 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddttrs.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, IB, INFO, JA, + $ LAF, LWORK, N, NRHS + 102 FORMAT('PDDTTRS inputs: + $ TRANS: ', A5,' + $ IB: ', I5,' INFO: ', I5,' JA: ', I5,' LAF: ', I + $ 5,' LWORK: ', I5,' N: ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -535,12 +561,14 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDDTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRS, D&C alg.: NB too small', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -554,6 +582,7 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, INFO = -15 CALL PXERBLA( ICTXT, 'PDDTTRS: worksize error', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -621,16 +650,21 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRS', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * - IF( NRHS.EQ.0 ) - $ RETURN + IF( NRHS.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -772,6 +806,7 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, WORK( 1 ) = WORK_SIZE_MIN * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDTTRS diff --git a/SRC/pddttrsv.f b/SRC/pddttrsv.f index 7afdbaa9..d0a59da4 100644 --- a/SRC/pddttrsv.f +++ b/SRC/pddttrsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS @@ -422,7 +429,26 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pddttrsv.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, UPLO, IB, INFO, + $ JA, LAF, LWORK, N, NRHS + 102 FORMAT('PDDTTRSV inputs: + $ TRANS: ', A5,' UPLO: ', A5,' + $ IB: ', I5,' INFO: ', I5,' JA: ', I5,' LAF: ', I + $ 5,' LWORK: ', I5,' N: ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -556,6 +582,7 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, CALL PXERBLA( ICTXT, $ 'PDDTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -563,6 +590,7 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRSV, D&C alg.: NB too small', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -576,6 +604,7 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, INFO = -16 CALL PXERBLA( ICTXT, 'PDDTTRSV: worksize error', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -645,16 +674,21 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRSV', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * - IF( NRHS.EQ.0 ) - $ RETURN + IF( NRHS.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1483,6 +1517,7 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, WORK( 1 ) = WORK_SIZE_MIN * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDDTTRSV diff --git a/SRC/pdgbsv.f b/SRC/pdgbsv.f index 566133fe..e3ad3138 100644 --- a/SRC/pdgbsv.f +++ b/SRC/pdgbsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, $ DESCB, WORK, LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER BWL, BWU, IB, INFO, JA, LWORK, N, NRHS * .. @@ -387,7 +394,26 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, * .. External Subroutines .. EXTERNAL PDGBTRF, PDGBTRS, PXERBLA * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgbsv.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) BWL, BWU, IB, INFO, + $ JA, LWORK, N, NRHS + 102 FORMAT('PDGBSV inputs: + $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5,' NRH + $ S: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines @@ -410,6 +436,7 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, CALL PXERBLA( ICTXT, $ 'PDGBSV', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * @@ -432,6 +459,7 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDGBSV', -INFO ) ENDIF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -445,9 +473,11 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBSV', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGBSV diff --git a/SRC/pdgbtrf.f b/SRC/pdgbtrf.f index dfea1da9..8ce8e4bf 100644 --- a/SRC/pdgbtrf.f +++ b/SRC/pdgbtrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, $ WORK, LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N * .. @@ -395,7 +402,25 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgbtrf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) BWL, BWU, INFO, JA, + $ LAF, LWORK, N + 102 FORMAT('PDGBTRF inputs: + $ BWL: ', I5,' BWU: ', I5,' INFO: ', I5,' JA: ', + $ I5,' LAF: ', I5,' LWORK: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * * Test the input parameters @@ -473,12 +498,14 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDGBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDGBTRF, D&C alg.: NB too small', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -493,6 +520,7 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDGBTRF: auxiliary storage error ', $ -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -509,6 +537,7 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, WORK( 1 ) = WORK_SIZE_MIN CALL PXERBLA( ICTXT, 'PDGBTRF: worksize error ', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -564,13 +593,16 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBTRF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1093,6 +1125,7 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, END IF * * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGBTRF diff --git a/SRC/pdgbtrs.f b/SRC/pdgbtrs.f index fa8f4a1a..57a2d1b6 100644 --- a/SRC/pdgbtrs.f +++ b/SRC/pdgbtrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -407,7 +414,27 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgbtrs.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, BWL, BWU, IB, + $ INFO, JA, LAF, LWORK, N, NRHS + 102 FORMAT('PDGBTRS inputs: + $ TRANS: ', A5,' + $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LAF: ', I5,' LWORK: ', I5,' N + $ : ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * * Test the input parameters @@ -543,12 +570,14 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDGBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDGBTRS, D&C alg.: NB too small', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -564,6 +593,7 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, INFO = -16 CALL PXERBLA( ICTXT, 'PDGBTRS: worksize error ', -INFO ) END IF + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -635,16 +665,21 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBTRS', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * - IF( NRHS.EQ.0 ) - $ RETURN + IF( NRHS.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1165,6 +1200,7 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * WORK( 1 ) = WORK_SIZE_MIN * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGBTRS diff --git a/SRC/pdgebal.f b/SRC/pdgebal.f index 1875ad4d..e33f8456 100644 --- a/SRC/pdgebal.f +++ b/SRC/pdgebal.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) * * Contribution from the Department of Computing Science and HPC2N, @@ -8,6 +14,7 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -206,7 +213,25 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgebal.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) JOB, IHI, ILO, INFO, N + 102 FORMAT('PDGEBAL inputs: + $ JOB: ', A5,' + $ IHI: ', I5,' ILO: ', I5,' INFO: ', I5,' N: ', I + $ 5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* INFO = 0 ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) @@ -223,6 +248,7 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBAL', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -384,6 +410,7 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) * INFO = -3 CALL PXERBLA( ICTXT, 'PDGEBAL', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF F = F*SCLFAC @@ -436,6 +463,7 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) ILO = K IHI = L * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEBAL diff --git a/SRC/pdgebd2.f b/SRC/pdgebd2.f index b35c5dbd..e159d71e 100644 --- a/SRC/pdgebd2.f +++ b/SRC/pdgebd2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -265,7 +272,25 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgebd2.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGEBD2 inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Test the input parameters * @@ -305,8 +330,10 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -332,6 +359,7 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, END IF IF( MYROW.EQ.IAROW ) $ TAUP( II ) = ZERO + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -437,6 +465,7 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEBD2 diff --git a/SRC/pdgebrd.f b/SRC/pdgebrd.f index 1389a3c4..5709bf44 100644 --- a/SRC/pdgebrd.f +++ b/SRC/pdgebrd.f @@ -1,11 +1,18 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ WORK, LWORK, INFO ) * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. -* May 25, 2001 +* May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -267,7 +274,25 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgebrd.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGEBRD inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -312,16 +337,20 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBRD', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * MN = MIN( M, N ) - IF( MN.EQ.0 ) - $ RETURN + IF( MN.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Initialize parameters. * @@ -405,6 +434,7 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEBRD diff --git a/SRC/pdgecon.f b/SRC/pdgecon.f index b4d95ce1..619404f3 100644 --- a/SRC/pdgecon.f +++ b/SRC/pdgecon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, INFO, JA, LIWORK, LWORK, N @@ -208,7 +215,27 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgecon.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) NORM, IA, INFO, JA, + $ LIWORK, LWORK, N, ANORM, RCOND + 102 FORMAT('PDGECON inputs: + $ NORM: ', A5,' + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LIWORK: ' + $ , I5,' LWORK: ', I5,' N: ', I5,' + $ ANORM: ', F9.4,' RCOND: ', F9.4) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -277,8 +304,10 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGECON', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -287,11 +316,14 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( ANORM.EQ.ZERO ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -406,6 +438,7 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGECON diff --git a/SRC/pdgeequ.f b/SRC/pdgeequ.f index 76172f8c..9a430785 100644 --- a/SRC/pdgeequ.f +++ b/SRC/pdgeequ.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N DOUBLE PRECISION AMAX, COLCND, ROWCND @@ -185,7 +192,27 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeequ.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, M, N, + $ AMAX, COLCND, ROWCND + 102 FORMAT('PDGEEQU inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, + $ ' N: ', I5,' + $ AMAX: ', F9.4,' COLCND: ', F9.4,' ROWCND: ', F9. + $ 4) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -205,6 +232,7 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEEQU', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -214,6 +242,7 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, ROWCND = ONE COLCND = ONE AMAX = ZERO + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -285,8 +314,10 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, 50 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) - IF( INFO.NE.0 ) - $ RETURN + IF( INFO.NE.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF ELSE * * Invert the scale factors. @@ -344,8 +375,10 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, 110 CONTINUE CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) - IF( INFO.NE.0 ) - $ RETURN + IF( INFO.NE.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF ELSE * * Invert the scale factors. @@ -360,6 +393,7 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEEQU diff --git a/SRC/pdgehd2.f b/SRC/pdgehd2.f index 26f6ba10..30270a83 100644 --- a/SRC/pdgehd2.f +++ b/SRC/pdgehd2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. @@ -214,7 +221,25 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgehd2.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, IHI, ILO, INFO, + $ JA, LWORK, N + 102 FORMAT('PDGEHD2 inputs: + $ IA: ', I5,' IHI: ', I5,' ILO: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -256,8 +281,10 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEHD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -287,6 +314,7 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEHD2 diff --git a/SRC/pdgehrd.f b/SRC/pdgehrd.f index 49ee0af6..b59d0604 100644 --- a/SRC/pdgehrd.f +++ b/SRC/pdgehrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IHI, ILO, INFO, JA, LWORK, N * .. @@ -227,7 +234,25 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgehrd.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, IHI, ILO, INFO, + $ JA, LWORK, N + 102 FORMAT('PDGEHRD inputs: + $ IA: ', I5,' IHI: ', I5,' ILO: ', I5,' INFO: ', + $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -288,8 +313,10 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEHRD', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -310,8 +337,10 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * * Quick return if possible * - IF( IHI-ILO.LE.0 ) - $ RETURN + IF( IHI-ILO.LE.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) @@ -375,6 +404,7 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEHRD diff --git a/SRC/pdgelq2.f b/SRC/pdgelq2.f index 2e6962ec..5bcf44ae 100644 --- a/SRC/pdgelq2.f +++ b/SRC/pdgelq2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -187,7 +194,25 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgelq2.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGELQ2 inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -222,15 +247,19 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -264,6 +293,7 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGELQ2 diff --git a/SRC/pdgelqf.f b/SRC/pdgelqf.f index 1196e3ba..72a5d3ff 100644 --- a/SRC/pdgelqf.f +++ b/SRC/pdgelqf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -188,7 +195,25 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgelqf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGELQF inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -230,15 +255,19 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELQF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 @@ -305,6 +334,7 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGELQF diff --git a/SRC/pdgels.f b/SRC/pdgels.f index dae599fe..ad90abd8 100644 --- a/SRC/pdgels.f +++ b/SRC/pdgels.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, NRHS @@ -263,7 +270,27 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgels.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, IA, IB, INFO, + $ JA, JB, LWORK, M, N, NRHS + 102 FORMAT('PDGELS inputs: + $ TRANS: ', A5,' + $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 + $ ,' JB: ', I5,' LWORK: ', I5,' M: ', I5,' N: ', + $ I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -375,8 +402,10 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELS', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -385,6 +414,7 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL PDLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, $ IB, JB, DESCB ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -583,6 +613,7 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGELS diff --git a/SRC/pdgeql2.f b/SRC/pdgeql2.f index 62a37506..e25576ce 100644 --- a/SRC/pdgeql2.f +++ b/SRC/pdgeql2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -190,7 +197,25 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeql2.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGEQL2 inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -225,15 +250,19 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -302,6 +331,7 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEQL2 diff --git a/SRC/pdgeqlf.f b/SRC/pdgeqlf.f index 072841f8..259a22c8 100644 --- a/SRC/pdgeqlf.f +++ b/SRC/pdgeqlf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -190,7 +197,25 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqlf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGEQLF inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -232,15 +257,19 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQLF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 @@ -304,6 +333,7 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEQLF diff --git a/SRC/pdgeqpf.f b/SRC/pdgeqpf.f index 4fb239aa..2a6444e1 100644 --- a/SRC/pdgeqpf.f +++ b/SRC/pdgeqpf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * and University of California, Berkeley. * November 20, 2019 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, INFO, LWORK, M, N * .. @@ -172,9 +179,9 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * * References * ========== -* +* * For modifications introduced in Scalapack 2.1 -* LAWN 295 +* LAWN 295 * New robust ScaLAPACK routine for computing the QR factorization with column pivoting * Zvonimir Bujanovic, Zlatko Drmac * http://www.netlib.org/lapack/lawnspdf/lawn295.pdf @@ -219,7 +226,25 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, IDINT, MAX, MIN, MOD, SQRT * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqpf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, JA, INFO, LWORK, + $ M, N + 102 FORMAT('PDGEQPF inputs: + $ IA: ', I5,' JA: ', I5,' INFO: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -263,15 +288,19 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQPF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -544,6 +573,7 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEQPF diff --git a/SRC/pdgeqr2.f b/SRC/pdgeqr2.f index a1a9f423..7b42b25d 100644 --- a/SRC/pdgeqr2.f +++ b/SRC/pdgeqr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -189,7 +196,25 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqr2.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGEQR2 inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -224,15 +249,19 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -301,6 +330,7 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEQR2 diff --git a/SRC/pdgeqrf.f b/SRC/pdgeqrf.f index 00d222ee..d46d9d57 100644 --- a/SRC/pdgeqrf.f +++ b/SRC/pdgeqrf.f @@ -1,6 +1,10 @@ +* +* Copyright (c) 2022-23 Advanced Micro Devices, Inc.  All rights reserved. +* * -- ScaLAPACK routine -- -* Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. -* June 20, 2022 +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) @@ -10,6 +14,7 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -197,7 +202,25 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqrf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGEQRF inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -239,15 +262,19 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQRF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * K = MIN( M, N ) IPW = DESCA( NB_ ) * DESCA( NB_ ) + 1 @@ -327,6 +354,7 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGEQRF diff --git a/SRC/pdgerfs.f b/SRC/pdgerfs.f index 7ffd5e9c..df92cb3b 100644 --- a/SRC/pdgerfs.f +++ b/SRC/pdgerfs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, $ JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, @@ -8,6 +14,7 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX, @@ -300,7 +307,29 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgerfs.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, IA, IAF, IB, + $ IX, INFO, JA, JAF, JB, JX, + $ LIWORK, LWORK, N, NRHS + 102 FORMAT('PDGERFS inputs: + $ TRANS: ', A5,' + $ IA: ', I5,' IAF: ', I5,' IB: ', I5,' IX: ', I5, + $ ' INFO: ', I5,' JA: ', I5,' JAF: ', I5,' JB: ' + $ , I5,' JX: ', I5,' LIWORK: ', I5,' LWORK: ', I5 + $ ,' N: ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* EST = 0.0 * * Get grid parameters @@ -428,8 +457,10 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERFS', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -444,6 +475,7 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -880,6 +912,7 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGERFS diff --git a/SRC/pdgerq2.f b/SRC/pdgerq2.f index d3400afd..c623aa26 100644 --- a/SRC/pdgerq2.f +++ b/SRC/pdgerq2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -188,7 +195,25 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgerq2.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGERQ2 inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -223,15 +248,19 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -262,6 +291,7 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGERQ2 diff --git a/SRC/pdgerqf.f b/SRC/pdgerqf.f index bc4f3661..8b430f89 100644 --- a/SRC/pdgerqf.f +++ b/SRC/pdgerqf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -188,7 +195,25 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgerqf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LWORK, + $ M, N + 102 FORMAT('PDGERQF inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', + $ I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -230,15 +255,19 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERQF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * K = MIN( M, N ) IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 @@ -302,6 +331,7 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGERQF diff --git a/SRC/pdgesv.f b/SRC/pdgesv.f index 90441a26..65126d89 100644 --- a/SRC/pdgesv.f +++ b/SRC/pdgesv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, $ DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, * and University of California, Berkeley. * Jan 30, 2006 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, N, NRHS * .. @@ -177,7 +184,25 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgesv.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, + $ N, NRHS + 102 FORMAT('PDGESV inputs: + $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 + $ ,' JB: ', I5,' N: ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -220,6 +245,7 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGESV', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -237,6 +263,7 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, * END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGESV diff --git a/SRC/pdgesvd.f b/SRC/pdgesvd.f index 19ba4e5d..99a025e2 100644 --- a/SRC/pdgesvd.f +++ b/SRC/pdgesvd.f @@ -1,4 +1,10 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, + VT,IVT,JVT,DESCVT,WORK,LWORK,INFO) * @@ -8,6 +14,7 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, * Jan 2006 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBU,JOBVT INTEGER IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N @@ -320,8 +327,29 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, * .. Intrinsic Functions .. INTRINSIC MAX,MIN,SQRT,DBLE * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgesvd.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) JOBU,JOBVT, + $ IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N + 102 FORMAT('PDGESVDJOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, inputs: + $ JOBU: ', A5,' JOBVT: ', A5,' + $ IA: ', I5,' INFO: ', I5,' IU: ', I5,' IVT: ', I + $ 5,' JA: ', I5,' JU: ', I5,' JVT: ', I5,' LWORK + $ : ', I5,' M: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * This is just to keep ftnchek happy + AOCL_DTL_TRACE_EXIT_F IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN * CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL) @@ -472,6 +500,7 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, * IF (INFO.NE.0) THEN CALL PXERBLA(DESCA(CTXT_),'PDGESVD',-INFO) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF (LWORK.EQ.-1) THEN GO TO 40 @@ -636,5 +665,6 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, * * End of PDGESVD * + AOCL_DTL_TRACE_EXIT_F RETURN END diff --git a/SRC/pdgesvx.f b/SRC/pdgesvx.f index f70ffe9f..c1df72cf 100644 --- a/SRC/pdgesvx.f +++ b/SRC/pdgesvx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, IPIV, EQUED, R, C, B, IB, $ JB, DESCB, X, IX, JX, DESCX, RCOND, FERR, @@ -8,6 +14,7 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED, FACT, TRANS INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, @@ -446,7 +453,30 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgesvx.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) EQUED, FACT, TRANS, + $ IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, RCOND + 102 FORMAT('PDGESVX inputs: + $ EQUED: ', A5,' FACT: ', A5,' TRANS: ', A5,' + $ IA: ', I5,' IAF: ', I5,' IB: ', I5,' INFO: ', I + $ 5,' IX: ', I5,' JA: ', I5,' JAF: ', I5,' JB: ' + $ , I5,' JX: ', I5,' LIWORK: ', I5,' LWORK: ', I5 + $ ,' N: ', I5,' NRHS: ', I5,' + $ RCOND: ', F9.4) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -651,8 +681,10 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGESVX', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -729,6 +761,7 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO + AOCL_DTL_TRACE_EXIT_F RETURN END IF END IF @@ -751,6 +784,7 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -822,6 +856,7 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGESVX diff --git a/SRC/pdgetf2.f b/SRC/pdgetf2.f index a5c87772..ff8c0195 100644 --- a/SRC/pdgetf2.f +++ b/SRC/pdgetf2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. @@ -157,12 +164,24 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetf2.f' * .. Executable Statements .. * -* .. Debug trace log capture if the DTL is enabled -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') -#endif + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, M, N + 102 FORMAT('PDGETF2 inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, + $ ' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters. * @@ -195,18 +214,14 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) CALL PXERBLA( ICTXT, 'PDGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) * -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -259,9 +274,7 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) END IF * * -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETF2 diff --git a/SRC/pdgetf2K.f b/SRC/pdgetf2K.f index 6af9d607..5dbb7f41 100644 --- a/SRC/pdgetf2K.f +++ b/SRC/pdgetf2K.f @@ -1,9 +1,14 @@ +* +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. +* * -- ScaLAPACK routine -- -* Copyright (c) 2020-21 Advanced Micro Devices, Inc.  All rights reserved. -* June 10, 2020 +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. @@ -43,7 +48,7 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * to be present entirely in a single column of processes in the * 2D grid. This function is an internal function called by LU * factorization using lookahead panel optimization. In addition, -* the top L11 matrix is copied to a temporary buffer in the +* the top L11 matrix is copied to a temporary buffer in the * panel struct. * * The factorization has the form sub( A ) = P * L * U, where P is a @@ -187,7 +192,24 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetf2K.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, M, N + 102 FORMAT('PDGETF2K inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, + $ ' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters. * @@ -220,13 +242,16 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * MN = MIN( M, N ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, @@ -263,7 +288,7 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) IF( J-JA+1.LT.MN ) THEN CALL PDGER( M-J+JA-1, N-J+JA-1, -ONE, A, I+1, J, DESCA, $ 1, A, I, J+1, DESCA, DESCA( M_ ), A, I+1, - $ J+1, DESCA ) + $ J+1, DESCA ) END IF PI = PI + 1 10 CONTINUE @@ -271,7 +296,7 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * Copy L11 into LMEM. * IF( MYROW.EQ.IAROW ) - $ CALL PDPANEL_LCOPY( A, IA, JA, LDA, N, PANEL ) + $ CALL PDPANEL_LCOPY( A, IA, JA, LDA, N, PANEL ) * * Broadcast L11 with-in column * @@ -279,6 +304,7 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETF2K diff --git a/SRC/pdgetf2_comm.f b/SRC/pdgetf2_comm.f index 8ed9361d..cc2f466d 100644 --- a/SRC/pdgetf2_comm.f +++ b/SRC/pdgetf2_comm.f @@ -1,10 +1,15 @@ +* +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. +* * -- AOCL ScaLAPACK routine -- -* Copyright (c) 2020-21 Advanced Micro Devices, Inc.  All rights reserved. -* June 10, 2020 +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDGETF2_COMM( M, N, A, IA, JA, DESCA, IPIV, INFO ) * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, M, N * .. @@ -31,7 +36,24 @@ SUBROUTINE PDGETF2_COMM( M, N, A, IA, JA, DESCA, IPIV, INFO ) $ PDSCAL, PDSWAP, PB_TOPGET, PXERBLA * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetf2_comm.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, M, N + 102 FORMAT('PDGETF2_COMM inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, + $ ' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters. * @@ -56,6 +78,7 @@ SUBROUTINE PDGETF2_COMM( M, N, A, IA, JA, DESCA, IPIV, INFO ) * END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETF2_COMM diff --git a/SRC/pdgetri.f b/SRC/pdgetri.f index 3ef147f5..b8520574 100644 --- a/SRC/pdgetri.f +++ b/SRC/pdgetri.f @@ -1,12 +1,19 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK routine (version 1.7.4) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. -* v1.7.4: May 10, 2006 +* v1.7.4: May 10, 2006 * v1.7: May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LIWORK, LWORK, N * .. @@ -189,7 +196,25 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetri.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, INFO, JA, LIWORK, + $ LWORK, N + 102 FORMAT('PDGETRI inputs: + $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LIWORK: ' + $ , I5,' LWORK: ', I5,' N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -226,21 +251,21 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) ) * -* where +* where * M_P is the global length of the pivot vector * MP = DESCA( M_ ) + DESCA( MB_ ) * NPROW * I_P is IA * I_P = IA -* MB_P is the block size use for the block cyclic distribution of the +* MB_P is the block size use for the block cyclic distribution of the * pivot vector * MB_P = DESCA (MB_ ) -* LOCc ( . ) +* LOCc ( . ) * NUMROC ( . , DESCA ( NB_ ), MYCOL, DESCA ( CSRC_ ), NPCOL ) * LOCr ( . ) * NUMROC ( . , DESCA ( MB_ ), MYROW, DESCA ( RSRC_ ), NPROW ) * CEIL ( X / Y ) * ICEIL( X, Y ) -* LCM +* LCM * LCM = ILCM( NPROW, NPCOL ) * LCM = ILCM( NPROW, NPCOL ) @@ -285,22 +310,28 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRI', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Form inv(U). If INFO > 0 from PDTRTRI, then U is singular, * and the inverse is not computed. * CALL PDTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) - IF( INFO.GT.0 ) - $ RETURN + IF( INFO.GT.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Define array descriptor for working array WORK * @@ -371,6 +402,7 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETRI diff --git a/SRC/pdgetrs.f b/SRC/pdgetrs.f index 15ed7709..cb0ad6f5 100644 --- a/SRC/pdgetrs.f +++ b/SRC/pdgetrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ IB, JB, DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -177,7 +184,26 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetrs.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) TRANS, IA, IB, INFO, + $ JA, JB, N, NRHS + 102 FORMAT('PDGETRS inputs: + $ TRANS: ', A5,' + $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 + $ ,' JB: ', I5,' N: ', I5,' NRHS: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -232,13 +258,16 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRS', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. NRHS.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, @@ -284,6 +313,7 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * END IF * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETRS diff --git a/SRC/pdggqrf.f b/SRC/pdggqrf.f index 2f2487f3..be4265c6 100644 --- a/SRC/pdggqrf.f +++ b/SRC/pdggqrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. @@ -280,7 +287,26 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdggqrf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, + $ LWORK, M, N, P + 102 FORMAT('PDGGQRF inputs: + $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 + $ ,' JB: ', I5,' LWORK: ', I5,' M: ', I5,' N: ', + $ I5,' P: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -342,8 +368,10 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGQRF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -363,6 +391,7 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, CALL PDGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGGQRF diff --git a/SRC/pdggrqf.f b/SRC/pdggrqf.f index 78fd5eac..59a21059 100644 --- a/SRC/pdggrqf.f +++ b/SRC/pdggrqf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. @@ -280,7 +287,26 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdggrqf.f' * .. Executable Statements .. +* + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, + $ LWORK, M, N, P + 102 FORMAT('PDGGRQF inputs: + $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 + $ ,' JB: ', I5,' LWORK: ', I5,' M: ', I5,' N: ', + $ I5,' P: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * * Get grid parameters * @@ -342,8 +368,10 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGRQF', -INFO ) + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -364,6 +392,7 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, CALL PDGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGGRQF diff --git a/SRC/pdlaswp.f b/SRC/pdlaswp.f index 43ed72ed..bec0fe91 100644 --- a/SRC/pdlaswp.f +++ b/SRC/pdlaswp.f @@ -1,5 +1,10 @@ +* +* Copyright (c) 2022-23 Advanced Micro Devices, Inc.  All rights reserved. +* * -- ScaLAPACK routine -- -* Copyright (c) 2022 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, $ IPIV ) @@ -9,6 +14,7 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, JA, K1, K2, N @@ -153,17 +159,30 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, LOGICAL LSAME EXTERNAL LSAME * .. +* .. DTL variables declaration .. + CHARACTER BUFFER*512 + CHARACTER*15, PARAMETER :: FILE_NAME = 'pdlaswp.f' * .. Executable Statements .. * -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') -#endif + CALL AOCL_SCALAPACK_INIT( ) +* + IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN +* .. Init DTL log Buffer to zero .. + BUFFER='0' + AOCL_DTL_TRACE_ENTRY_F + WRITE(BUFFER,102) DIREC, ROWCOL, IA, JA, + $ K1, K2, N + 102 FORMAT('PDLASWP inputs: + $ DIREC: ', A5,' ROWCOL: ', A5,' + $ IA: ', I5,' JA: ', I5,' K1: ', I5,' K2: ', I5,' + $ N: ', I5) + CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + END IF +* * Quick return if possible * IF( N.EQ.0 ) THEN -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -211,9 +230,7 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, END IF END IF * -#ifdef AOCL_DTL - CALL AOCL_SL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif + AOCL_DTL_TRACE_EXIT_F RETURN * * End PDLASWP From ecd3a5e61c3e97358d0c61bc8533b946fddaa35d Mon Sep 17 00:00:00 2001 From: nprasadm Date: Thu, 13 Apr 2023 21:48:06 +0530 Subject: [PATCH 17/30] SCALAPACK: Fix added for build issue for LLVM(Clang-16) on windows. i) Clang-16 (ISO C99 and later) do not support implicit function declarations. ii) Added the prototypes in the relavant header files to enable the build. Signed-off-by: Nagendra AMD-Internal: [CPUPL-3179] Change-Id: I1b0283fbcdfbd7cedd22d3eb155cbacde8824dfe --- BLACS/SRC/Bdef.h | 23 ++++++++++++++++++++--- FRAMEWORK/SL_Context.c | 2 +- REDIST/SRC/redist.h | 1 + SRC/pblas.h | 11 +++++++++++ TOOLS/SL_gridreshape.c | 1 + 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/BLACS/SRC/Bdef.h b/BLACS/SRC/Bdef.h index ba7d99d3..da2f062c 100644 --- a/BLACS/SRC/Bdef.h +++ b/BLACS/SRC/Bdef.h @@ -30,7 +30,7 @@ typedef struct bLaCsCoNtExT BLACSCONTEXT; struct bLaCsCoNtExT { BLACSSCOPE rscp, cscp, ascp, pscp; /* row, column, all, and pt2pt scopes */ -#ifdef ENABLE_LOOK_AHEAD_FOR_LU +#ifdef ENABLE_LOOK_AHEAD_FOR_LU BLACSSCOPE lscp; /* row scope for look ahead panel */ #endif /* ENABLE_LOOK_AHEAD_FOR_LU */ BLACSSCOPE *scp; /* pointer to present scope */ @@ -77,7 +77,7 @@ struct bLaCbUfF #define AOCL_KEEP_POLLING 2 /* - * Definition of buffer type for + * Definition of buffer type for * user defined datatype communications */ typedef struct aOcLpBuFf AOCLPBUFF; @@ -557,5 +557,22 @@ Int BI_ContxtNum(BLACSCONTEXT *ctxt); #endif - +/* + * Prototypes declarations + */ +void BI_imvcopy(Int m, Int n, Int *A, Int lda, Int *buff); +void BI_ivmcopy(Int m, Int n, Int *A, Int lda, Int *buff); +void BI_smvcopy(Int m, Int n, float *A, Int lda, float *buff); +void BI_svmcopy(Int m, Int n, float *A, Int lda, float *buff); +void BI_dmvcopy(Int m, Int n, double *A, Int lda, double *buff); +void BI_dvmcopy(Int m, Int n, double *A, Int lda, double *buff); +void BI_TransDist(BLACSCONTEXT *ctxt, char scope, Int m, Int n, Int *rA, + Int *cA, Int ldrc, BI_DistType *dist, Int rdest, Int cdest); +void Cblacs_pinfo(Int *mypnum, Int *nprocs); +void blacs_gridmap_(Int *ConTxt, Int *usermap, Int *ldup, Int *nprow0, + Int *npcol0); +void Cblacs_gridinfo(Int ConTxt, Int *nprow, Int *npcol, Int *myrow, Int *mycol); +void Cblacs_abort(Int ConTxt, Int ErrNo); +void Cblacs_get(Int ConTxt, Int what, Int *val); +void Cblacs_gridmap(Int *ConTxt, Int *usermap, Int ldup, Int nprow0, Int npcol0); #endif diff --git a/FRAMEWORK/SL_Context.c b/FRAMEWORK/SL_Context.c index 5e1f6319..4e8e99d0 100644 --- a/FRAMEWORK/SL_Context.c +++ b/FRAMEWORK/SL_Context.c @@ -61,7 +61,7 @@ int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) return 0; } // -- pthread_once() -- -static bool +static BOOL scalapack_init_once_wrapper(scalapack_pthread_once_t *once, void *param, void **context) { (void)once; diff --git a/REDIST/SRC/redist.h b/REDIST/SRC/redist.h index 0bdfe270..57d2331a 100644 --- a/REDIST/SRC/redist.h +++ b/REDIST/SRC/redist.h @@ -1,6 +1,7 @@ #ifdef ENABLE_ILP64 #include #endif +#include "../BLACS/SRC/Bdef.h" #ifdef T3D #define float double diff --git a/SRC/pblas.h b/SRC/pblas.h index e85fcdab..bed1258b 100644 --- a/SRC/pblas.h +++ b/SRC/pblas.h @@ -15,6 +15,9 @@ * --------------------------------------------------------------------- */ +#ifndef PBLAS_H +#define PBLAS_H + /* * This file includes the standard C libraries, as well as system * dependent include files. All PBLAS routines include this file. @@ -1150,3 +1153,11 @@ typedef char * F_CHAR; #define blacs_pcoord_ blacs_pcoord__ #define blacs_barrier_ blacs_barrier__ #endif + +/* + * Prototypes declarations + */ +Int numroc_(Int *N, Int *NB, Int *IPROC, Int *ISRCPROC, Int *NPROCS); + + +#endif /* PBLAS_H */ diff --git a/TOOLS/SL_gridreshape.c b/TOOLS/SL_gridreshape.c index aa0c9290..b069a8c8 100644 --- a/TOOLS/SL_gridreshape.c +++ b/TOOLS/SL_gridreshape.c @@ -1,5 +1,6 @@ #include #include +#include "../BLACS/SRC/Bdef.h" #ifndef Int #define Int int From b1a059c8c2f413b0213d2c957e7fc8e9e04f3051 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Thu, 13 Apr 2023 20:49:57 +0530 Subject: [PATCH 18/30] aocl-scaLAPACK: Trace and Logging feature fine-tuning. 1) Support added to enable trace and log individually at run time. 2) Log files also capture MPI rank, Process grid information. Signed-off-by: Nagendra AMD-Internal: [CPUPL-2700] Change-Id: Ia5126448408e4b59c5626d4f1345d01e8f77943e --- AOCL_DTL/aocldtl.c | 3 +- AOCL_DTL/aocldtlcf.h | 16 +++---- AOCL_DTL/aoclflist.c | 15 +++--- CMakeLists.txt | 40 ++++++++++------ FRAMEWORK/SL_Context.c | 24 ++++++++-- FRAMEWORK/SL_Context.h | 11 +++-- FRAMEWORK/SL_Context_fortran_include.h | 10 ++-- FRAMEWORK/SL_Context_module.f | 30 ++++++++++-- SRC/aocl_dtl_trace_entry.c | 15 +++--- SRC/pddbsv.f | 53 +++++++++++++++------ SRC/pddbtrf.f | 60 ++++++++++++++++++------ SRC/pddbtrs.f | 64 +++++++++++++++++++------- SRC/pddbtrsv.f | 64 +++++++++++++++++++------- SRC/pddtsv.f | 51 ++++++++++++++------ SRC/pddttrf.f | 59 ++++++++++++++++++------ SRC/pddttrs.f | 62 +++++++++++++++++++------ SRC/pddttrsv.f | 63 +++++++++++++++++++------ SRC/pdgbsv.f | 53 +++++++++++++++------ SRC/pdgbtrf.f | 60 ++++++++++++++++++------ SRC/pdgbtrs.f | 64 +++++++++++++++++++------- SRC/pdgebal.f | 47 +++++++++++++------ SRC/pdgebd2.f | 51 ++++++++++++++------ SRC/pdgebrd.f | 51 ++++++++++++++------ SRC/pdgecon.f | 61 +++++++++++++++++------- SRC/pdgeequ.f | 58 ++++++++++++++++------- SRC/pdgehd2.f | 48 +++++++++++++------ SRC/pdgehrd.f | 51 ++++++++++++++------ SRC/pdgelq2.f | 51 ++++++++++++++------ SRC/pdgelqf.f | 51 ++++++++++++++------ SRC/pdgels.f | 55 +++++++++++++++------- SRC/pdgeql2.f | 51 ++++++++++++++------ SRC/pdgeqlf.f | 51 ++++++++++++++------ SRC/pdgeqpf.f | 51 ++++++++++++++------ SRC/pdgeqr2.f | 51 ++++++++++++++------ SRC/pdgeqrf.f | 51 ++++++++++++++------ SRC/pdgerfs.f | 59 ++++++++++++++++-------- SRC/pdgerq2.f | 51 ++++++++++++++------ SRC/pdgerqf.f | 51 ++++++++++++++------ SRC/pdgesv.f | 45 ++++++++++++------ SRC/pdgesvd.f | 53 ++++++++++++++------- SRC/pdgesvx.f | 63 +++++++++++++++++-------- SRC/pdgetf2.f | 47 +++++++++++++------ SRC/pdgetf2K.f | 46 ++++++++++++------ SRC/pdgetf2_comm.f | 40 ++++++++++------ SRC/pdgetrf.f | 19 ++++---- SRC/pdgetrf0.f | 1 - SRC/pdgetri.f | 54 ++++++++++++++++------ SRC/pdgetrs.f | 50 ++++++++++++++------ SRC/pdggqrf.f | 51 ++++++++++++++------ SRC/pdggrqf.f | 51 ++++++++++++++------ SRC/pdlaswp.f | 49 +++++++++++++------- 51 files changed, 1678 insertions(+), 648 deletions(-) diff --git a/AOCL_DTL/aocldtl.c b/AOCL_DTL/aocldtl.c index e53a2e6e..941aa8a7 100644 --- a/AOCL_DTL/aocldtl.c +++ b/AOCL_DTL/aocldtl.c @@ -253,9 +253,8 @@ void DTL_Trace( fprintf(pOutFile, "Out of %s() with error %s\n", pi8FunctionName, pi8Message); } break; - case TRACE_TYPE_LOG: - fprintf(pOutFile, "%s:%d:%s\n", pi8FileName, ui32LineNumber, pi8Message); + fprintf(pOutFile, "%s:%d,%s\n", pi8FileName, ui32LineNumber, pi8Message); break; case TRACE_TYPE_RAW: diff --git a/AOCL_DTL/aocldtlcf.h b/AOCL_DTL/aocldtlcf.h index a2198dbf..b7b101bc 100644 --- a/AOCL_DTL/aocldtlcf.h +++ b/AOCL_DTL/aocldtlcf.h @@ -1,28 +1,28 @@ /*=================================================================== * File Name : aocldtlcf.h - * + * * Description : This is configuration file for debug and trace * libaray, all debug features (except auto trace) * can be enabled/disabled in this file. * * Copyright (C) 2020, Advanced Micro Devices, Inc - * + * *==================================================================*/ #ifndef _AOCLDTLCF_H_ #define _AOCLDTLCF_H_ -/* Macro for tracing the log If the user wants to enable tracing he has to +/* Macro for tracing the log If the user wants to enable tracing he has to enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_TRACE_ENABLE 0 +#define AOCL_DTL_TRACE_ENABLE 1 -/* Macro for dumping the log If the user wants to enable dumping he has to +/* Macro for dumping the log If the user wants to enable dumping he has to enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_DUMP_ENABLE 0 +#define AOCL_DTL_DUMP_ENABLE 1 /* Macro for logging the logs If the user wants to enable loging information he has to enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_LOG_ENABLE 0 +#define AOCL_DTL_LOG_ENABLE 1 /* Select the trace level till which you want to log the data */ /* By default it will log for all levels */ @@ -31,7 +31,7 @@ /* user has to explicitly use the below macros to identify ciriticality of the logged message */ #define AOCL_DTL_LEVEL_ALL (14) -#define AOCL_DTL_LEVEL_TRACE_8 (13) +#define AOCL_DTL_LEVEL_TRACE_8 (13) #define AOCL_DTL_LEVEL_TRACE_7 (12) /* Kernels */ #define AOCL_DTL_LEVEL_TRACE_6 (11) #define AOCL_DTL_LEVEL_TRACE_5 (10) diff --git a/AOCL_DTL/aoclflist.c b/AOCL_DTL/aoclflist.c index 5bba38fb..e4c178f6 100644 --- a/AOCL_DTL/aoclflist.c +++ b/AOCL_DTL/aoclflist.c @@ -1,12 +1,12 @@ /*=================================================================== * File Name : aoclflist.c - * - * Description : Linked list of open files assocaited with + * + * Description : Linked list of open files assocaited with * each thread. This is used to log the data * to correct file as per the current thread id. * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * + * Copyright (C) 2020-23, Advanced Micro Devices, Inc + * *==================================================================*/ #include "aocltpdef.h" @@ -14,9 +14,9 @@ #include "aoclfal.h" #include "aoclflist.h" #include "aoclos.h" +#include "SL_Context.h" - -/* Disable instrumentation for following function, since they are called from +/* Disable instrumentation for following function, since they are called from * Auto Generated execution trace handlers. */ Bool AOCL_FLIST_IsEmpty( AOCL_FLIST_Node *plist) __attribute__((no_instrument_function)); @@ -89,8 +89,7 @@ AOCL_FAL_FILE *AOCL_FLIST_AddFile(const int8 *pchFilePrefix, AOCL_FLIST_Node **p } /* We don't have exiting file, lets try to open new one */ - sprintf(pchFileName, "P%d_T%d_%s", AOCL_getpid(), tid, pchFilePrefix); - + sprintf(pchFileName, "P%d_T%d_Rank%d_%s", AOCL_getpid(), tid,scalapack_context.rank, pchFilePrefix); file = AOCL_FAL_Open(pchFileName, "wb"); if (file == NULL) { diff --git a/CMakeLists.txt b/CMakeLists.txt index 40dd1cfe..03e3f5c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,6 +157,18 @@ macro(SCALAPACK_install_library lib) ) endmacro() +#Function to add file name for DTL purpose +function(custom_macros_for_each_scalapack_source target_library_sources) + get_target_property(src_files_list "${target_library_sources}" SOURCES) + foreach(src_file ${src_files_list}) + get_filename_component(src_file_name "${src_file}" NAME) + get_filename_component(function_name "${src_file}" NAME_WLE) + set_property( + SOURCE "${src_file}" APPEND + PROPERTY COMPILE_DEFINITIONS "FUNCTION_NAME=\"${function_name}\"" "FILE_NAME=\"${src_file_name}\"" ) + endforeach() +endfunction() + # -------------------------------------------------- # Testing SET(DART_TESTING_TIMEOUT 600) @@ -313,15 +325,15 @@ else(LAPACK_FOUND) message(STATUS "--> LAPACK and BLAS were not found. Reference LAPACK and BLAS will be downloaded and installed") include(ExternalProject) ExternalProject_Add( - lapack - URL http://www.netlib.org/lapack/lapack.tgz - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SCALAPACK_BINARY_DIR} + lapack + URL http://www.netlib.org/lapack/lapack.tgz + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SCALAPACK_BINARY_DIR} PREFIX ${SCALAPACK_BINARY_DIR}/dependencies ) - if (UNIX) + if (UNIX) SET(LAPACK_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/liblapack.a CACHE STRING "LAPACK library" FORCE) SET(BLAS_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/libblas.a CACHE STRING "BLAS library" FORCE) - else (UNIX) # On Windows + else (UNIX) # On Windows SET(LAPACK_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/liblapack.lib CACHE STRING "LAPACK library" FORCE) SET(BLAS_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/libblas.lib CACHE STRING "BLAS library" FORCE) endif (UNIX) @@ -336,10 +348,8 @@ MESSAGE(STATUS "=========") # # AOCL_DTL # -if (UNIX) - add_subdirectory(AOCL_DTL) - append_subdir_files(dtl "AOCL_DTL") -endif() +add_subdirectory(AOCL_DTL) +append_subdir_files(dtl "AOCL_DTL") # # FRAMEWORK @@ -391,10 +401,12 @@ endif () if (UNIX) if(CUSTOM_BLACS_FOUND) add_library(scalapack ${dtl} ${framework} ${framework-C} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + custom_macros_for_each_scalapack_source (scalapack) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) else(CUSTOM_BLACS_FOUND) add_library(scalapack ${dtl} ${framework} ${framework-C} ${blacs} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + custom_macros_for_each_scalapack_source (scalapack) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) endif(CUSTOM_BLACS_FOUND) @@ -403,13 +415,13 @@ else (UNIX) # Need to separate Fortran and C Code # create Fortran objects and add to scalapack library first if(CUSTOM_BLACS_FOUND) add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) - add_library(scalapack $ ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack $ ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) else(CUSTOM_BLACS_FOUND) add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) - add_library(scalapack $ ${blacs} ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack $ ${blacs} ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) @@ -417,15 +429,15 @@ else (UNIX) # Need to separate Fortran and C Code else (CMAKE_C_COMPILER_ID MATCHES Clang) # create C objects and add to scalapack library first if(CUSTOM_BLACS_FOUND) - add_library(scalapack-C OBJECT ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack-C OBJECT ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) - add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) + add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${dtl} ${framework} ${src} ${extra_lapack} ) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) else(CUSTOM_BLACS_FOUND) - add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) + add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) diff --git a/FRAMEWORK/SL_Context.c b/FRAMEWORK/SL_Context.c index 4e8e99d0..12235be8 100644 --- a/FRAMEWORK/SL_Context.c +++ b/FRAMEWORK/SL_Context.c @@ -24,6 +24,8 @@ #include "SL_Context.h" #include #include +#include "../BLACS/SRC/Bdef.h" + #if defined(SCALAPACK_NO_CONTEXT) // This branch defines a pthread-like API, scalapack_pthread_*(), and implements it // in terms of "dummy" code that doesn't depend on POSIX threads or any other @@ -128,16 +130,27 @@ int scalapack_env_get_var(const char *env, int fallback) void scalapack_thread_init_rntm_from_env(aocl_scalapack_global_context *context) { int status; - /* Check whether DTL is set in the run-time environment */ - status = scalapack_env_get_var("AOCL_SL_DTL", -1); + /* Check whether 'debug trace' is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_TRACE", -1); if (status == -1) { - context->is_dtl_enabled = 0; + context->is_trace_enabled = 0; } else { - context->is_dtl_enabled = 1; + context->is_trace_enabled = 1; + } + + /* Check whether 'debug trace' is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_LOG", -1); + if (status == -1) + { + context->is_log_enabled = 0; + } + else + { + context->is_log_enabled = 1; } /* Check whether AOCL-progress requirement is set in the run-time environment */ @@ -152,6 +165,9 @@ void scalapack_thread_init_rntm_from_env(aocl_scalapack_global_context *context) context->is_progress_enabled = 1; } + /* set the context MPI rank, number of processes */ + Cblacs_pinfo(&(context->rank), &(context->num_procs) ); + /* Since multithreading support is not present in the aocl-scaLAPACK, we set the context number of threads to 1. NOTE: If multithread support is enabled, then we have to set the diff --git a/FRAMEWORK/SL_Context.h b/FRAMEWORK/SL_Context.h index 4338d180..8eaee0c6 100644 --- a/FRAMEWORK/SL_Context.h +++ b/FRAMEWORK/SL_Context.h @@ -77,11 +77,16 @@ void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)); *****************************************************************************************/ typedef struct _aocl_scalapack_global_context { - int num_threads; /* Number of Threads */ - int is_dtl_enabled; /* DTL log */ + int num_threads; /* Number of threads */ + int is_trace_enabled; /* Enable debug trace */ + int is_log_enabled; /* Enable debug log */ int is_progress_enabled; /* AOCL-progress */ + // MPI Variables related to rank, process info + int rank; + int num_procs; } aocl_scalapack_global_context; + extern aocl_scalapack_global_context scalapack_context; typedef aocl_scalapack_global_context aocl_scalapack_global_context_; typedef aocl_scalapack_global_context AOCL_SCALAPACK_GLOBAL_CONTEXT; @@ -90,7 +95,7 @@ typedef aocl_scalapack_global_context AOCL_SCALAPACK_GLOBAL_CONTEXT; * context * * \retval none. - + void aocl_scalapack_init(); */ void aocl_scalapack_init_(); void AOCL_SCALAPACK_INIT(); diff --git a/FRAMEWORK/SL_Context_fortran_include.h b/FRAMEWORK/SL_Context_fortran_include.h index 16779a16..a37477fe 100644 --- a/FRAMEWORK/SL_Context_fortran_include.h +++ b/FRAMEWORK/SL_Context_fortran_include.h @@ -26,12 +26,16 @@ #define SL_CONTEXT_FORTRAN_H #if _WIN32 -#define AOCL_DTL_TRACE_ENTRY_F CONTINUE -#define AOCL_DTL_TRACE_EXIT_F CONTINUE +#define AOCL_DTL_TRACE_ENTRY_F CALL SL_DTL_TRACE_ENTRY_F(__FILE__, __LINE__, ' ') +#define AOCL_DTL_TRACE_EXIT_F CALL SL_DTL_TRACE_EXIT_F (__FILE__, __LINE__, ' ') + +#define AOCL_DTL_LOG_ENTRY_F CALL AOCL_SL_DTL_LOG_ENTRY(__FILE__, "", __LINE__, BUFFER ) #define aocl_scalapack_init_ AOCL_SCALAPACK_INIT #else -#define AOCL_DTL_TRACE_ENTRY_F CALL AOCL_SL_DTL_TRACE_ENTRY(FILE_NAME, __LINE__, ' ') +#define AOCL_DTL_TRACE_ENTRY_F CALL SL_DTL_TRACE_ENTRY_F(FILE_NAME, __LINE__, ' ') #define AOCL_DTL_TRACE_EXIT_F CALL SL_DTL_TRACE_EXIT_F (FILE_NAME, __LINE__, ' ') + +#define AOCL_DTL_LOG_ENTRY_F CALL AOCL_SL_DTL_LOG_ENTRY( FILE_NAME// C_NULL_CHAR, FUNCTION_NAME// C_NULL_CHAR, __LINE__, BUFFER ) #endif #endif /* SL_CONTEXT_FORTRAN_H */ diff --git a/FRAMEWORK/SL_Context_module.f b/FRAMEWORK/SL_Context_module.f index 91973e28..f0b8e5b7 100644 --- a/FRAMEWORK/SL_Context_module.f +++ b/FRAMEWORK/SL_Context_module.f @@ -25,8 +25,11 @@ MODULE LINK_TO_C_GLOBALS USE, INTRINSIC::ISO_C_BINDING TYPE, BIND(C)::AOCL_SCALAPACK_GLOBAL_CONTEXT INTEGER(C_INT)::NUM_THREADS - INTEGER(C_INT)::IS_DTL_ENABLED + INTEGER(C_INT)::IS_TRACE_ENABLED + INTEGER(C_INT)::IS_LOG_ENABLED INTEGER(C_INT)::IS_PROGRESS_ENABLED + INTEGER(C_INT)::RANK + INTEGER(C_INT)::NUM_PROCS END TYPE TYPE(AOCL_SCALAPACK_GLOBAL_CONTEXT),BIND(C)::SCALAPACK_CONTEXT @@ -44,7 +47,7 @@ SUBROUTINE SL_DTL_TRACE_ENTRY_F( FILENAME, LINENUMBER, MESSAGE ) * .. * .. Array Arguments .. CHARACTER FILENAME( * ), MESSAGE( * ) - IF(SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1) THEN + IF(SCALAPACK_CONTEXT%IS_TRACE_ENABLED.EQ.1) THEN CALL AOCL_SL_DTL_TRACE_ENTRY(FILENAME, LINENUMBER, MESSAGE) END IF RETURN @@ -64,7 +67,7 @@ SUBROUTINE SL_DTL_TRACE_EXIT_F( FILENAME, LINENUMBER, MESSAGE ) * .. * .. Array Arguments .. CHARACTER FILENAME( * ), MESSAGE( * ) - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN + IF( SCALAPACK_CONTEXT%IS_TRACE_ENABLED.EQ.1 ) THEN CALL AOCL_SL_DTL_TRACE_EXIT(FILENAME, LINENUMBER, MESSAGE) END IF RETURN @@ -72,3 +75,24 @@ SUBROUTINE SL_DTL_TRACE_EXIT_F( FILENAME, LINENUMBER, MESSAGE ) * End of SL_DTL_TRACE_ENTRY_F * END +* +* ===================================================================== +* SUBROUTINE SL_DTL_LOG_ENTRY_F +* ===================================================================== + SUBROUTINE SL_DTL_LOG_ENTRY_F( FILENAME, LINENUMBER, MESSAGE ) +* + USE LINK_TO_C_GLOBALS +* .. Scalar Arguments .. + INTEGER LINENUMBER +* .. +* .. Array Arguments .. + CHARACTER FILENAME( * ), MESSAGE( * ) + IF(SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1) THEN + CALL AOCL_SL_DTL_TRACE_ENTRY(FILENAME, LINENUMBER, MESSAGE) + END IF + RETURN +* +* End of SL_DTL_TRACE_ENTRY_F +* + END + diff --git a/SRC/aocl_dtl_trace_entry.c b/SRC/aocl_dtl_trace_entry.c index 2c9e73fb..d3c5be8a 100644 --- a/SRC/aocl_dtl_trace_entry.c +++ b/SRC/aocl_dtl_trace_entry.c @@ -1,7 +1,7 @@ /* --------------------------------------------------------------------- * * -- AOCL ScaLAPACK routine -- -* Copyright (c) 2020-2022 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2020-2023 Advanced Micro Devices, Inc.  All rights reserved. * * --------------------------------------------------------------------- */ @@ -17,12 +17,15 @@ /* Customized for Fortran calls from Scalapack code */ -void aocl_sl_dtl_log_entry_( char *buffer ) +void aocl_sl_dtl_log_entry_( const char *filename, const char *function_name, + unsigned int *line_number, const char *buffer ) { -#if AOCL_DTL_LOG_ENABLE - /* Capture the contents to the DTL log file */ - AOCL_DTL_LOG(AOCL_DTL_LEVEL_INFO, buffer); -#endif + DTL_Trace(AOCL_DTL_LEVEL_INFO, + TRACE_TYPE_LOG, + filename, + function_name, + *line_number, + buffer); } void aocl_sl_dtl_trace_entry_( const char * fileName, unsigned int * lineNumber, diff --git a/SRC/pddbsv.f b/SRC/pddbsv.f index b5841339..d4709cda 100644 --- a/SRC/pddbsv.f +++ b/SRC/pddbsv.f @@ -389,26 +389,23 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, * .. External Subroutines .. EXTERNAL PDDBTRF, PDDBTRS, PXERBLA * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbsv.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) BWL, BWU, IB, INFO, - $ JA, LWORK, N, NRHS - 102 FORMAT('PDDBSV inputs: - $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5,' NRH - $ S: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines @@ -431,12 +428,29 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDDBSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) BWL, BWU, IB, INFO, JA, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDDBSV inputs:,BWL:',I5,',BWU:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -454,6 +468,9 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDDBSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -468,9 +485,15 @@ SUBROUTINE PDDBSV( N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, DESCB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddbtrf.f b/SRC/pddbtrf.f index cfc6dcf8..f23c8fa0 100644 --- a/SRC/pddbtrf.f +++ b/SRC/pddbtrf.f @@ -388,25 +388,23 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbtrf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) BWL, BWU, INFO, JA, - $ LAF, LWORK, N - 102 FORMAT('PDDBTRF inputs: - $ BWL: ', I5,' BWU: ', I5,' INFO: ', I5,' JA: ', - $ I5,' LAF: ', I5,' LWORK: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * @@ -440,6 +438,19 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, MBW2 = MAX_BW*MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) BWL, BWU, INFO, JA, LAF, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDDBTRF inputs:,BWL:',I5,',BWU:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -486,6 +497,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDDBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -493,6 +507,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -508,6 +525,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDDBTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -523,6 +543,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, INFO = -10 CALL PXERBLA( ICTXT, 'PDDBTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -579,6 +602,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -586,6 +612,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -1275,6 +1304,9 @@ SUBROUTINE PDDBTRF( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK, CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddbtrs.f b/SRC/pddbtrs.f index 867b11f3..a966e7a4 100644 --- a/SRC/pddbtrs.f +++ b/SRC/pddbtrs.f @@ -402,27 +402,23 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbtrs.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, BWL, BWU, IB, - $ INFO, JA, LAF, LWORK, N, NRHS - 102 FORMAT('PDDBTRS inputs: - $ TRANS: ', A5,' - $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LAF: ', I5,' LWORK: ', I5,' N - $ : ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * @@ -481,6 +477,21 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, BWL, BWU, IB, INFO, JA, + $ LAF, LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDDBTRS inputs:,TRANS:',A5,',BWL:',I5, + $ ',BWU:',I5,',IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LAF:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -555,6 +566,9 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDDBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -562,6 +576,9 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*MAX( BWL, BWU ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -576,6 +593,9 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, INFO = -15 CALL PXERBLA( ICTXT, 'PDDBTRS: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -648,6 +668,9 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -655,11 +678,17 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -774,6 +803,9 @@ SUBROUTINE PDDBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, B, IB, * WORK( 1 ) = WORK_SIZE_MIN * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddbtrsv.f b/SRC/pddbtrsv.f index 860f1f5a..89ab7d6c 100644 --- a/SRC/pddbtrsv.f +++ b/SRC/pddbtrsv.f @@ -416,27 +416,23 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddbtrsv.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, UPLO, BWL, BWU, - $ IB, INFO, JA, LAF, LWORK, N, NRHS - 102 FORMAT('PDDBTRSV inputs: - $ TRANS: ', A5,' UPLO: ', A5,' - $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LAF: ', I5,' LWORK: ', I5,' N - $ : ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * @@ -500,6 +496,21 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, MBW2 = MAX_BW*MAX_BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, UPLO, BWL, BWU, IB, INFO, + $ JA, LAF, LWORK, N, NRHS, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDDBTRSV inputs:,TRANS:',A5,',UPLO:',A5, + $ ',BWL:',I5,',BWU:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -583,6 +594,9 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, CALL PXERBLA( ICTXT, $ 'PDDBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -591,6 +605,9 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PDDBTRSV, D&C alg.: NB too small', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -605,6 +622,9 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, INFO = -16 CALL PXERBLA( ICTXT, 'PDDBTRSV: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -679,6 +699,9 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDBTRSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -686,11 +709,17 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -1576,6 +1605,9 @@ SUBROUTINE PDDBTRSV( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, * WORK( 1 ) = WORK_SIZE_MIN * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddtsv.f b/SRC/pddtsv.f index a6b8a73a..3bd85e50 100644 --- a/SRC/pddtsv.f +++ b/SRC/pddtsv.f @@ -399,25 +399,23 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, * .. External Subroutines .. EXTERNAL PDDTTRF, PDDTTRS, PXERBLA * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddtsv.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IB, INFO, JA, LWORK, - $ N, NRHS - 102 FORMAT('PDDTSV inputs: - $ IB: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' N: ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines @@ -443,12 +441,28 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDDTSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IB, INFO, JA, LWORK, N, NRHS, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDDTSV inputs:,IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -466,6 +480,9 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDDTSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -480,9 +497,15 @@ SUBROUTINE PDDTSV( N, NRHS, DL, D, DU, JA, DESCA, B, IB, DESCB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddttrf.f b/SRC/pddttrf.f index df1a410c..0d524c2a 100644 --- a/SRC/pddttrf.f +++ b/SRC/pddttrf.f @@ -398,25 +398,23 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddttrf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) INFO, JA, LAF, LWORK, - $ N - 102 FORMAT('PDDTTRF inputs: - $ INFO: ', I5,' JA: ', I5,' LAF: ', I5,' LWORK: ' - $ , I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * @@ -453,6 +451,18 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, JA, LAF, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDDTTRF inputs:,INFO:',I5,',JA:',I5,',LAF:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -483,6 +493,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDDTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -490,6 +503,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -505,6 +521,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDDTTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -520,6 +539,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -10 CALL PXERBLA( ICTXT, 'PDDTTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -572,6 +594,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -579,6 +604,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -1067,6 +1095,9 @@ SUBROUTINE PDDTTRF( N, DL, D, DU, JA, DESCA, AF, LAF, WORK, LWORK, CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddttrs.f b/SRC/pddttrs.f index 6b7c725d..bff166db 100644 --- a/SRC/pddttrs.f +++ b/SRC/pddttrs.f @@ -417,26 +417,23 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddttrs.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, IB, INFO, JA, - $ LAF, LWORK, N, NRHS - 102 FORMAT('PDDTTRS inputs: - $ TRANS: ', A5,' - $ IB: ', I5,' INFO: ', I5,' JA: ', I5,' LAF: ', I - $ 5,' LWORK: ', I5,' N: ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * @@ -503,6 +500,20 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, IB, INFO, JA, LAF, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDDTTRS inputs:,TRANS:',A5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -561,6 +572,9 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDDTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -568,6 +582,9 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -582,6 +599,9 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, INFO = -15 CALL PXERBLA( ICTXT, 'PDDTTRS: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -650,6 +670,9 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -657,11 +680,17 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -805,6 +834,9 @@ SUBROUTINE PDDTTRS( TRANS, N, NRHS, DL, D, DU, JA, DESCA, B, IB, * WORK( 1 ) = WORK_SIZE_MIN * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pddttrsv.f b/SRC/pddttrsv.f index d0a59da4..9d1ecebd 100644 --- a/SRC/pddttrsv.f +++ b/SRC/pddttrsv.f @@ -429,26 +429,23 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pddttrsv.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, UPLO, IB, INFO, - $ JA, LAF, LWORK, N, NRHS - 102 FORMAT('PDDTTRSV inputs: - $ TRANS: ', A5,' UPLO: ', A5,' - $ IB: ', I5,' INFO: ', I5,' JA: ', I5,' LAF: ', I - $ 5,' LWORK: ', I5,' N: ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * @@ -515,6 +512,21 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, UPLO, IB, INFO, JA, LAF, + $ LWORK, N, NRHS, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDDTTRSV inputs:,TRANS:',A5,',UPLO:',A5, + $ ',IB:',I5,',INFO:',I5,',JA:',I5,',LAF:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -582,6 +594,9 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, CALL PXERBLA( ICTXT, $ 'PDDTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -590,6 +605,9 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, INFO = -( 9*100+4 ) CALL PXERBLA( ICTXT, 'PDDTTRSV, D&C alg.: NB too small', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -604,6 +622,9 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, INFO = -16 CALL PXERBLA( ICTXT, 'PDDTTRSV: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -674,6 +695,9 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDDTTRSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -681,11 +705,17 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -1516,6 +1546,9 @@ SUBROUTINE PDDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA, * WORK( 1 ) = WORK_SIZE_MIN * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgbsv.f b/SRC/pdgbsv.f index e3ad3138..ae8ccb36 100644 --- a/SRC/pdgbsv.f +++ b/SRC/pdgbsv.f @@ -394,26 +394,23 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, * .. External Subroutines .. EXTERNAL PDGBTRF, PDGBTRS, PXERBLA * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgbsv.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) BWL, BWU, IB, INFO, - $ JA, LWORK, N, NRHS - 102 FORMAT('PDGBSV inputs: - $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5,' NRH - $ S: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines @@ -436,12 +433,29 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, CALL PXERBLA( ICTXT, $ 'PDGBSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) BWL, BWU, IB, INFO, JA, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGBSV inputs:,BWL:',I5,',BWU:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -459,6 +473,9 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDGBSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -473,9 +490,15 @@ SUBROUTINE PDGBSV( N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, B, IB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgbtrf.f b/SRC/pdgbtrf.f index 8ce8e4bf..c74c789c 100644 --- a/SRC/pdgbtrf.f +++ b/SRC/pdgbtrf.f @@ -402,25 +402,23 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgbtrf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) BWL, BWU, INFO, JA, - $ LAF, LWORK, N - 102 FORMAT('PDGBTRF inputs: - $ BWL: ', I5,' BWU: ', I5,' INFO: ', I5,' JA: ', - $ I5,' LAF: ', I5,' LWORK: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * * Test the input parameters @@ -450,6 +448,19 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) BWL, BWU, INFO, JA, LAF, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGBTRF inputs:,BWL:',I5,',BWU:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -498,6 +509,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDGBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -505,6 +519,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDGBTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -520,6 +537,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDGBTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -537,6 +557,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, WORK( 1 ) = WORK_SIZE_MIN CALL PXERBLA( ICTXT, 'PDGBTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -593,6 +616,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -600,6 +626,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -1124,6 +1153,9 @@ SUBROUTINE PDGBTRF( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF, CALL IGEBR2D( ICTXT, 'A', ' ', 1, 1, INFO, 1, 0, 0 ) END IF * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgbtrs.f b/SRC/pdgbtrs.f index 57a2d1b6..605c0ae8 100644 --- a/SRC/pdgbtrs.f +++ b/SRC/pdgbtrs.f @@ -414,27 +414,23 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgbtrs.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, BWL, BWU, IB, - $ INFO, JA, LAF, LWORK, N, NRHS - 102 FORMAT('PDGBTRS inputs: - $ TRANS: ', A5,' - $ BWL: ', I5,' BWU: ', I5,' IB: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LAF: ', I5,' LWORK: ', I5,' N - $ : ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * * Test the input parameters @@ -494,6 +490,21 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, BWL, BWU, IB, INFO, JA, + $ LAF, LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDGBTRS inputs:,TRANS:',A5,',BWL:',I5, + $ ',BWU:',I5,',IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LAF:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -570,6 +581,9 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDGBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -577,6 +591,9 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.( BWL+BWU+1 ) ) ) THEN INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDGBTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -593,6 +610,9 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, INFO = -16 CALL PXERBLA( ICTXT, 'PDGBTRS: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -665,6 +685,9 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGBTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -672,11 +695,17 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -1199,6 +1228,9 @@ SUBROUTINE PDGBTRS( TRANS, N, BWL, BWU, NRHS, A, JA, DESCA, IPIV, * Output worksize * WORK( 1 ) = WORK_SIZE_MIN +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgebal.f b/SRC/pdgebal.f index e33f8456..6e603a5b 100644 --- a/SRC/pdgebal.f +++ b/SRC/pdgebal.f @@ -213,29 +213,39 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgebal.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) JOB, IHI, ILO, INFO, N - 102 FORMAT('PDGEBAL inputs: - $ JOB: ', A5,' - $ IHI: ', I5,' ILO: ', I5,' INFO: ', I5,' N: ', I - $ 5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F INFO = 0 ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOB, IHI, ILO, INFO, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEBAL inputs:,JOB:',A5,',IHI:',I5,',ILO:',I5, + $ ',INFO:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. @@ -248,6 +258,9 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBAL', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -410,6 +423,9 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) * INFO = -3 CALL PXERBLA( ICTXT, 'PDGEBAL', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -462,6 +478,9 @@ SUBROUTINE PDGEBAL( JOB, N, A, DESCA, ILO, IHI, SCALE, INFO ) 210 CONTINUE ILO = K IHI = L +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgebd2.f b/SRC/pdgebd2.f index e159d71e..cfd89173 100644 --- a/SRC/pdgebd2.f +++ b/SRC/pdgebd2.f @@ -272,31 +272,42 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgebd2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGEBD2 inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Test the input parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEBD2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -330,9 +341,15 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -359,6 +376,9 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, END IF IF( MYROW.EQ.IAROW ) $ TAUP( II ) = ZERO +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -464,6 +484,9 @@ SUBROUTINE PDGEBD2( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, END IF * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgebrd.f b/SRC/pdgebrd.f index 5709bf44..8837fffe 100644 --- a/SRC/pdgebrd.f +++ b/SRC/pdgebrd.f @@ -274,31 +274,42 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgebrd.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGEBRD inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEBRD inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -337,9 +348,15 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEBRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -348,6 +365,9 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * MN = MIN( M, N ) IF( MN.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -433,6 +453,9 @@ SUBROUTINE PDGEBRD( M, N, A, IA, JA, DESCA, D, E, TAUQ, TAUP, CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgecon.f b/SRC/pdgecon.f index 619404f3..3610d3db 100644 --- a/SRC/pdgecon.f +++ b/SRC/pdgecon.f @@ -215,33 +215,44 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgecon.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) NORM, IA, INFO, JA, - $ LIWORK, LWORK, N, ANORM, RCOND - 102 FORMAT('PDGECON inputs: - $ NORM: ', A5,' - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LIWORK: ' - $ , I5,' LWORK: ', I5,' N: ', I5,' - $ ANORM: ', F9.4,' RCOND: ', F9.4) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, IA, INFO, JA, LIWORK, + $ LWORK, N, ANORM, RCOND, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDGECON inputs:,NORM:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',ANORM:',F9.4,',RCOND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -304,9 +315,15 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGECON', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -316,13 +333,22 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( ANORM.EQ.ZERO ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -437,6 +463,9 @@ SUBROUTINE PDGECON( NORM, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgeequ.f b/SRC/pdgeequ.f index 9a430785..69adbbea 100644 --- a/SRC/pdgeequ.f +++ b/SRC/pdgeequ.f @@ -192,33 +192,44 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeequ.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, M, N, - $ AMAX, COLCND, ROWCND - 102 FORMAT('PDGEEQU inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, - $ ' N: ', I5,' - $ AMAX: ', F9.4,' COLCND: ', F9.4,' ROWCND: ', F9. - $ 4) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, M, N, AMAX, COLCND, + $ ROWCND, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGEEQU inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',AMAX:',F9.4, + $ ',COLCND:',F9.4,',ROWCND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -232,6 +243,9 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEEQU', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -242,6 +256,9 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, ROWCND = ONE COLCND = ONE AMAX = ZERO +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -315,6 +332,9 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -376,6 +396,9 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, CALL IGAMX2D( ICTXT, 'Columnwise', COLCTOP, 1, 1, INFO, 1, $ IDUMM, IDUMM, -1, -1, MYCOL ) IF( INFO.NE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -392,6 +415,9 @@ SUBROUTINE PDGEEQU( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) * END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgehd2.f b/SRC/pdgehd2.f index 30270a83..9be4d5e8 100644 --- a/SRC/pdgehd2.f +++ b/SRC/pdgehd2.f @@ -221,31 +221,42 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgehd2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, IHI, ILO, INFO, - $ JA, LWORK, N - 102 FORMAT('PDGEHD2 inputs: - $ IA: ', I5,' IHI: ', I5,' ILO: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IHI, ILO, INFO, JA, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEHD2 inputs:,IA:',I5,',IHI:',I5,',ILO:',I5, + $ ',INFO:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -281,9 +292,15 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEHD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -313,6 +330,9 @@ SUBROUTINE PDGEHD2( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, 10 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgehrd.f b/SRC/pdgehrd.f index b59d0604..7edfcc01 100644 --- a/SRC/pdgehrd.f +++ b/SRC/pdgehrd.f @@ -234,31 +234,42 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgehrd.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, IHI, ILO, INFO, - $ JA, LWORK, N - 102 FORMAT('PDGEHRD inputs: - $ IA: ', I5,' IHI: ', I5,' ILO: ', I5,' INFO: ', - $ I5,' JA: ', I5,' LWORK: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IHI, ILO, INFO, JA, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEHRD inputs:,IA:',I5,',IHI:',I5,',ILO:',I5, + $ ',INFO:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -313,9 +324,15 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEHRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -338,6 +355,9 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, * Quick return if possible * IF( IHI-ILO.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -403,6 +423,9 @@ SUBROUTINE PDGEHRD( N, ILO, IHI, A, IA, JA, DESCA, TAU, WORK, CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgelq2.f b/SRC/pdgelq2.f index 5bcf44ae..2f2fd433 100644 --- a/SRC/pdgelq2.f +++ b/SRC/pdgelq2.f @@ -194,31 +194,42 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgelq2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGELQ2 inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGELQ2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -247,9 +258,15 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -257,6 +274,9 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -292,6 +312,9 @@ SUBROUTINE PDGELQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgelqf.f b/SRC/pdgelqf.f index 72a5d3ff..8cf57730 100644 --- a/SRC/pdgelqf.f +++ b/SRC/pdgelqf.f @@ -195,31 +195,42 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgelqf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGELQF inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGELQF inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -255,9 +266,15 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELQF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -265,6 +282,9 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -333,6 +353,9 @@ SUBROUTINE PDGELQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgels.f b/SRC/pdgels.f index ad90abd8..41111b45 100644 --- a/SRC/pdgels.f +++ b/SRC/pdgels.f @@ -270,33 +270,44 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgels.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, IA, IB, INFO, - $ JA, JB, LWORK, M, N, NRHS - 102 FORMAT('PDGELS inputs: - $ TRANS: ', A5,' - $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 - $ ,' JB: ', I5,' LWORK: ', I5,' M: ', I5,' N: ', - $ I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, IA, IB, INFO, JA, JB, + $ LWORK, M, N, NRHS, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDGELS inputs:,TRANS:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -402,9 +413,15 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGELS', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -414,6 +431,9 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, IF( MIN( M, N, NRHS ).EQ.0 ) THEN CALL PDLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, $ IB, JB, DESCB ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -612,6 +632,9 @@ SUBROUTINE PDGELS( TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, 10 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgeql2.f b/SRC/pdgeql2.f index e25576ce..84a8786d 100644 --- a/SRC/pdgeql2.f +++ b/SRC/pdgeql2.f @@ -197,31 +197,42 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeql2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGEQL2 inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEQL2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -250,9 +261,15 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -260,6 +277,9 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -330,6 +350,9 @@ SUBROUTINE PDGEQL2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgeqlf.f b/SRC/pdgeqlf.f index 259a22c8..0804f900 100644 --- a/SRC/pdgeqlf.f +++ b/SRC/pdgeqlf.f @@ -197,31 +197,42 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqlf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGEQLF inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEQLF inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -257,9 +268,15 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQLF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -267,6 +284,9 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -332,6 +352,9 @@ SUBROUTINE PDGEQLF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgeqpf.f b/SRC/pdgeqpf.f index 2a6444e1..608473b2 100644 --- a/SRC/pdgeqpf.f +++ b/SRC/pdgeqpf.f @@ -226,31 +226,42 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, IDINT, MAX, MIN, MOD, SQRT * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqpf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, JA, INFO, LWORK, - $ M, N - 102 FORMAT('PDGEQPF inputs: - $ IA: ', I5,' JA: ', I5,' INFO: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, INFO, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEQPF inputs:,IA:',I5,',JA:',I5,',INFO:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -288,9 +299,15 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQPF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -298,6 +315,9 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -572,6 +592,9 @@ SUBROUTINE PDGEQPF( M, N, A, IA, JA, DESCA, IPIV, TAU, WORK, 120 CONTINUE * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgeqr2.f b/SRC/pdgeqr2.f index 7b42b25d..cf57407c 100644 --- a/SRC/pdgeqr2.f +++ b/SRC/pdgeqr2.f @@ -196,31 +196,42 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqr2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGEQR2 inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEQR2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -249,9 +260,15 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -259,6 +276,9 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -329,6 +349,9 @@ SUBROUTINE PDGEQR2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgeqrf.f b/SRC/pdgeqrf.f index d46d9d57..69c0b959 100644 --- a/SRC/pdgeqrf.f +++ b/SRC/pdgeqrf.f @@ -202,31 +202,42 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgeqrf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGEQRF inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGEQRF inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -262,9 +273,15 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGEQRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -272,6 +289,9 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -353,6 +373,9 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgerfs.f b/SRC/pdgerfs.f index df92cb3b..a7406e25 100644 --- a/SRC/pdgerfs.f +++ b/SRC/pdgerfs.f @@ -307,29 +307,23 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgerfs.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, IA, IAF, IB, - $ IX, INFO, JA, JAF, JB, JX, - $ LIWORK, LWORK, N, NRHS - 102 FORMAT('PDGERFS inputs: - $ TRANS: ', A5,' - $ IA: ', I5,' IAF: ', I5,' IB: ', I5,' IX: ', I5, - $ ' INFO: ', I5,' JA: ', I5,' JAF: ', I5,' JB: ' - $ , I5,' JX: ', I5,' LIWORK: ', I5,' LWORK: ', I5 - $ ,' N: ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F EST = 0.0 * * Get grid parameters @@ -337,6 +331,23 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, IA, IAF, IB, IX, INFO, + $ JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDGERFS inputs:,TRANS:',A5,',IA:',I5,',IAF:',I5, + $ ',IB:',I5,',IX:',I5,',INFO:',I5, + $ ',JA:',I5,',JAF:',I5,',JB:',I5, + $ ',JX:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * NOTRAN = LSAME( TRANS, 'N' ) @@ -457,9 +468,15 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERFS', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -475,6 +492,9 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -911,6 +931,9 @@ SUBROUTINE PDGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgerq2.f b/SRC/pdgerq2.f index c623aa26..f4184b11 100644 --- a/SRC/pdgerq2.f +++ b/SRC/pdgerq2.f @@ -195,31 +195,42 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgerq2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGERQ2 inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGERQ2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -248,9 +259,15 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERQ2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -258,6 +275,9 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -290,6 +310,9 @@ SUBROUTINE PDGERQ2( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgerqf.f b/SRC/pdgerqf.f index 8b430f89..e5f52876 100644 --- a/SRC/pdgerqf.f +++ b/SRC/pdgerqf.f @@ -195,31 +195,42 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgerqf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LWORK, - $ M, N - 102 FORMAT('PDGERQF inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LWORK: ', - $ I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGERQF inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -255,9 +266,15 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGERQF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -265,6 +282,9 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -330,6 +350,9 @@ SUBROUTINE PDGERQF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * WORK( 1 ) = DBLE( LWMIN ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgesv.f b/SRC/pdgesv.f index 65126d89..3a4d5b2d 100644 --- a/SRC/pdgesv.f +++ b/SRC/pdgesv.f @@ -184,31 +184,42 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgesv.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, IB, INFO, JA, JB, - $ N, NRHS - 102 FORMAT('PDGESV inputs: - $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 - $ ,' JB: ', I5,' N: ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, N, NRHS, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGESV inputs:,IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -245,6 +256,9 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGESV', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -262,6 +276,9 @@ SUBROUTINE PDGESV( N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, $ B, IB, JB, DESCB, INFO ) * END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgesvd.f b/SRC/pdgesvd.f index 99a025e2..06ed7446 100644 --- a/SRC/pdgesvd.f +++ b/SRC/pdgesvd.f @@ -327,32 +327,47 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, * .. Intrinsic Functions .. INTRINSIC MAX,MIN,SQRT,DBLE * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgesvd.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) JOBU,JOBVT, - $ IA,INFO,IU,IVT,JA,JU,JVT,LWORK,M,N - 102 FORMAT('PDGESVDJOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, inputs: - $ JOBU: ', A5,' JOBVT: ', A5,' - $ IA: ', I5,' INFO: ', I5,' IU: ', I5,' IVT: ', I - $ 5,' JA: ', I5,' JU: ', I5,' JVT: ', I5,' LWORK - $ : ', I5,' M: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F IF (BLOCK_CYCLIC_2D*DTYPE_*LLD_*MB_*M_*NB_*N_.LT.0) RETURN * CALL BLACS_GRIDINFO(DESCA(CTXT_),NPROW,NPCOL,MYPROW,MYPCOL) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBU,JOBVT, IA,INFO,IU,IVT, + $ JA,JU,JVT,LWORK,M,N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDGESVD inputs:,JOBU:',A5,',JOBVT:',A5, + $ ',IA:',I5,',INFO:',I5,',IU:',I5,',IVT:',I5, + $ ',JA:',I5,',JU:',I5,',JVT:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF ISCALE = 0 INFO = 0 * @@ -500,6 +515,9 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, * IF (INFO.NE.0) THEN CALL PXERBLA(DESCA(CTXT_),'PDGESVD',-INFO) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF (LWORK.EQ.-1) THEN @@ -664,6 +682,9 @@ SUBROUTINE PDGESVD(JOBU,JOBVT,M,N,A,IA,JA,DESCA,S,U,IU,JU,DESCU, CALL BLACS_GRIDEXIT(CONTEXTR) * * End of PDGESVD +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgesvx.f b/SRC/pdgesvx.f index c1df72cf..669a8819 100644 --- a/SRC/pdgesvx.f +++ b/SRC/pdgesvx.f @@ -453,36 +453,48 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. -* .. DTL variables declaration .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgesvx.f' + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) EQUED, FACT, TRANS, - $ IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, - $ LWORK, N, NRHS, RCOND - 102 FORMAT('PDGESVX inputs: - $ EQUED: ', A5,' FACT: ', A5,' TRANS: ', A5,' - $ IA: ', I5,' IAF: ', I5,' IB: ', I5,' INFO: ', I - $ 5,' IX: ', I5,' JA: ', I5,' JAF: ', I5,' JB: ' - $ , I5,' JX: ', I5,' LIWORK: ', I5,' LWORK: ', I5 - $ ,' N: ', I5,' NRHS: ', I5,' - $ RCOND: ', F9.4) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, FACT, TRANS, IA, IAF, + $ IB, INFO, IX, JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, + $ RCOND, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGESVX inputs:,EQUED:',A5,',FACT:',A5, + $ ',TRANS:',A5,',IA:',I5,',IAF:',I5,',IB:',I5, + $ ',INFO:',I5,',IX:',I5,',JA:',I5, + $ ',JAF:',I5,',JB:',I5,',JX:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',RCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -681,9 +693,15 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGESVX', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -761,6 +779,9 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -784,6 +805,9 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -855,6 +879,9 @@ SUBROUTINE PDGESVX( FACT, TRANS, N, NRHS, A, IA, JA, DESCA, AF, * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgetf2.f b/SRC/pdgetf2.f index ff8c0195..0feff12b 100644 --- a/SRC/pdgetf2.f +++ b/SRC/pdgetf2.f @@ -164,23 +164,23 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetf2.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, M, N - 102 FORMAT('PDGETF2 inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, - $ ' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * * Get grid parameters. @@ -188,6 +188,18 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDGETF2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -213,6 +225,9 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN @@ -221,6 +236,9 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -273,6 +291,9 @@ SUBROUTINE PDGETF2( M, N, A, IA, JA, DESCA, IPIV, INFO ) * END IF * +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgetf2K.f b/SRC/pdgetf2K.f index 5dbb7f41..221745d5 100644 --- a/SRC/pdgetf2K.f +++ b/SRC/pdgetf2K.f @@ -192,24 +192,23 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetf2K.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, M, N - 102 FORMAT('PDGETF2K inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, - $ ' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters. * @@ -217,6 +216,18 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDGETF2K inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -242,6 +253,9 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -249,6 +263,9 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) * Quick return if possible * IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -303,6 +320,9 @@ SUBROUTINE PDGETF2K( M, N, A, IA, JA, DESCA, IPIV, PANEL, INFO ) CALL PDPANEL_LCAST( PANEL, N ) * END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgetf2_comm.f b/SRC/pdgetf2_comm.f index cc2f466d..db1dcc1f 100644 --- a/SRC/pdgetf2_comm.f +++ b/SRC/pdgetf2_comm.f @@ -36,29 +36,40 @@ SUBROUTINE PDGETF2_COMM( M, N, A, IA, JA, DESCA, IPIV, INFO ) $ PDSCAL, PDSWAP, PB_TOPGET, PXERBLA * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetf2_comm.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, M, N - 102 FORMAT('PDGETF2_COMM inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' M: ', I5, - $ ' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDGETF2_COMM inputs:,IA:',I5,',INFO:',I5, + $ ',JA:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF MN = MIN( M, N ) @@ -77,6 +88,9 @@ SUBROUTINE PDGETF2_COMM( M, N, A, IA, JA, DESCA, IPIV, INFO ) $ MN, MYROW, IACOL ) * END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgetrf.f b/SRC/pdgetrf.f index 7bdd985c..e7437b8f 100644 --- a/SRC/pdgetrf.f +++ b/SRC/pdgetrf.f @@ -1,5 +1,5 @@ * -- ScaLAPACK routine -- -* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +* Copyright (c) 2020-2023 Advanced Micro Devices, Inc. All rights reserved. * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -141,16 +141,19 @@ SUBROUTINE PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * * ===================================================================== * - CHARACTER BUFFER*450 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetrf.f' +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR CALL AOCL_SCALAPACK_INIT( ) AOCL_DTL_TRACE_ENTRY_F * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN - WRITE(BUFFER,101) M, N, IA, JA - 101 FORMAT('pdgetrf inputs: M: ', I2, ' N: ', I2 ,' - $ IA: ', I2,' JA: ', I2 ) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,101) M, N, IA, JA, eos_str + 101 FORMAT('pdgetrf inputs:,M:',I9,',N:',I9, + $ ',IA:',I5,',JA:',I5,A5 ) + AOCL_DTL_LOG_ENTRY_F END IF * diff --git a/SRC/pdgetrf0.f b/SRC/pdgetrf0.f index 6ab7e5c6..d9ce0e99 100644 --- a/SRC/pdgetrf0.f +++ b/SRC/pdgetrf0.f @@ -176,7 +176,6 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTRINSIC MIN, MOD * .. * .. Executable Statements .. - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetrf0.f' // C_NULL_CHAR * * Get grid parameters * diff --git a/SRC/pdgetri.f b/SRC/pdgetri.f index b8520574..b7c1efd6 100644 --- a/SRC/pdgetri.f +++ b/SRC/pdgetri.f @@ -196,31 +196,42 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetri.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, INFO, JA, LIWORK, - $ LWORK, N - 102 FORMAT('PDGETRI inputs: - $ IA: ', I5,' INFO: ', I5,' JA: ', I5,' LIWORK: ' - $ , I5,' LWORK: ', I5,' N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LIWORK, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDGETRI inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -310,9 +321,15 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRI', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -320,6 +337,9 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -329,6 +349,9 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * CALL PDTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) IF( INFO.GT.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -401,6 +424,9 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdgetrs.f b/SRC/pdgetrs.f index cb0ad6f5..f30796a1 100644 --- a/SRC/pdgetrs.f +++ b/SRC/pdgetrs.f @@ -184,32 +184,43 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdgetrs.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) TRANS, IA, IB, INFO, - $ JA, JB, N, NRHS - 102 FORMAT('PDGETRS inputs: - $ TRANS: ', A5,' - $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 - $ ,' JB: ', I5,' N: ', I5,' NRHS: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, IA, IB, INFO, JA, JB, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGETRS inputs:,TRANS:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -258,6 +269,9 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -265,6 +279,9 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * Quick return if possible * IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -312,6 +329,9 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ DESCB, IPIV, IA, 1, DESCIP, IDUM1 ) * END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdggqrf.f b/SRC/pdggqrf.f index be4265c6..49c9b6bf 100644 --- a/SRC/pdggqrf.f +++ b/SRC/pdggqrf.f @@ -287,32 +287,44 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdggqrf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, IB, INFO, JA, JB, - $ LWORK, M, N, P - 102 FORMAT('PDGGQRF inputs: - $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 - $ ,' JB: ', I5,' LWORK: ', I5,' M: ', I5,' N: ', - $ I5,' P: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, LWORK, + $ M, N, P, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGGQRF inputs:,IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',P:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -368,9 +380,15 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGQRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -390,6 +408,9 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * CALL PDGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdggrqf.f b/SRC/pdggrqf.f index 59a21059..d042a07b 100644 --- a/SRC/pdggrqf.f +++ b/SRC/pdggrqf.f @@ -287,32 +287,44 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdggrqf.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) IA, IB, INFO, JA, JB, - $ LWORK, M, N, P - 102 FORMAT('PDGGRQF inputs: - $ IA: ', I5,' IB: ', I5,' INFO: ', I5,' JA: ', I5 - $ ,' JB: ', I5,' LWORK: ', I5,' M: ', I5,' N: ', - $ I5,' P: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF * +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, LWORK, + $ M, N, P, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGGRQF inputs:,IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',P:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -368,9 +380,15 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGRQF', -INFO ) +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -391,6 +409,9 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * CALL PDGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN diff --git a/SRC/pdlaswp.f b/SRC/pdlaswp.f index bec0fe91..519bfd8e 100644 --- a/SRC/pdlaswp.f +++ b/SRC/pdlaswp.f @@ -1,5 +1,5 @@ * -* Copyright (c) 2022-23 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. * * -- ScaLAPACK routine -- * @@ -159,34 +159,48 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, LOGICAL LSAME EXTERNAL LSAME * .. -* .. DTL variables declaration .. - CHARACTER BUFFER*512 - CHARACTER*15, PARAMETER :: FILE_NAME = 'pdlaswp.f' +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* * CALL AOCL_SCALAPACK_INIT( ) * - IF( SCALAPACK_CONTEXT%IS_DTL_ENABLED.EQ.1 ) THEN -* .. Init DTL log Buffer to zero .. - BUFFER='0' - AOCL_DTL_TRACE_ENTRY_F - WRITE(BUFFER,102) DIREC, ROWCOL, IA, JA, - $ K1, K2, N - 102 FORMAT('PDLASWP inputs: - $ DIREC: ', A5,' ROWCOL: ', A5,' - $ IA: ', I5,' JA: ', I5,' K1: ', I5,' K2: ', I5,' - $ N: ', I5) - CALL AOCL_SL_DTL_LOG_ENTRY( BUFFER ) - END IF +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* AOCL_DTL_TRACE_EXIT_F RETURN END IF * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIREC, ROWCOL, IA, JA, K1, K2, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASWP inputs:,DIREC:',A5,',ROWCOL:',A5, + $ ',IA:',I5,',JA:',I5,',K1:',I5,',K2:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( LSAME( ROWCOL, 'R' ) ) THEN IF( LSAME( DIREC, 'F' ) ) THEN @@ -229,6 +243,9 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, 40 CONTINUE END IF END IF +* +* +* Capture the subroutine exit in the trace file * AOCL_DTL_TRACE_EXIT_F RETURN From fca64cb7888ddc4686ee4bcf5da232eaf448e778 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Thu, 20 Apr 2023 16:25:48 +0530 Subject: [PATCH 19/30] Build option added to enable 'address sanitizer'(ASAN) tests. 1) The configure option '-DENABLE_ASAN_TESTS=ON' builds the scalapack test-suite with compile option '-fsanitize=address'. 2) ASAN testing detects memory related bugs. Signed-off-by: Nagendra AMD-Internal: [CPUPL-2465] Change-Id: I0e0893c445a05901384bf1f7f8cc9c259f035ea0 --- CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 03e3f5c7..31fcce7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,9 @@ option(ENABLE_AOCL_PROGRESS "Enable progress feature " OFF) # DTL option option(ENABLE_DTL "Enable DTL feature " OFF) +# ASAN testing option +option(ENABLE_ASAN_TESTS "Enable Address sanitizer tests " OFF) + # Option: Include build number in the version string. option (ENABLE_SET_LIB_VERSION "Set library version" OFF) @@ -46,6 +49,13 @@ endif() set(CMAKE_ICC_FLAGS " ") set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp" ) +# set compile flags to enable address sanitizer (ASAN) tests +if(ENABLE_ASAN_TESTS) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fsanitize=address " ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address ") + message(STATUS " ASAN testing enabled for the scaLAPACK build ") +endif(ENABLE_ASAN_TESTS) + # Option to enable the scaLAPACK test-suite with "Dynamic work buffer memory allocation". if(ENABLE_LARGE_MATRIX_TESTING) From b89dd5f3ca73de3b5defba1c2cea10231039307c Mon Sep 17 00:00:00 2001 From: nprasadm Date: Thu, 20 Apr 2023 21:15:35 +0530 Subject: [PATCH 20/30] Trace and Logging feature enabled for 55 double data type APIs. Signed-off-by: Nagendra AMD-Internal: [CPUPL-2700] Change-Id: Ie37ebfa7297ad92dc3464a71222e40d5b1924e5d --- SRC/pdhseqr.f | 58 +++++++++++++++++++++++++++++++ SRC/pdlabad.f | 27 +++++++++++++++ SRC/pdlabrd.f | 49 ++++++++++++++++++++++++-- SRC/pdlacon.f | 69 +++++++++++++++++++++++++++++++++++-- SRC/pdlaconsb.f | 44 ++++++++++++++++++++++++ SRC/pdlacp2.f | 67 ++++++++++++++++++++++++++++++++---- SRC/pdlacp3.f | 53 +++++++++++++++++++++++++--- SRC/pdlacpy.f | 36 +++++++++++++++++-- SRC/pdlaed0.f | 55 +++++++++++++++++++++++++++++- SRC/pdlaed1.f | 61 +++++++++++++++++++++++++++++++-- SRC/pdlaed2.f | 53 ++++++++++++++++++++++++++-- SRC/pdlaed3.f | 51 +++++++++++++++++++++++++-- SRC/pdlaedz.f | 48 +++++++++++++++++++++++++- SRC/pdlaevswp.f | 47 ++++++++++++++++++++++++- SRC/pdlahqr.f | 63 ++++++++++++++++++++++++++++++++-- SRC/pdlahrd.f | 49 ++++++++++++++++++++++++-- SRC/pdlamch.f | 23 +++++++++++++ SRC/pdlamr1d.f | 56 ++++++++++++++++++++++++++++-- SRC/pdlamve.f | 40 ++++++++++++++++++++++ SRC/pdlange.f | 39 +++++++++++++++++++++ SRC/pdlanhs.f | 39 +++++++++++++++++++++ SRC/pdlansy.f | 39 +++++++++++++++++++++ SRC/pdlantr.f | 40 ++++++++++++++++++++++ SRC/pdlapiv.f | 60 +++++++++++++++++++++++++++++--- SRC/pdlapv2.f | 59 +++++++++++++++++++++++++++++--- SRC/pdlaqge.f | 46 +++++++++++++++++++++++++ SRC/pdlaqr0.f | 52 ++++++++++++++++++++++++++++ SRC/pdlaqr1.f | 91 +++++++++++++++++++++++++++++++++++++++++++------ SRC/pdlaqr2.f | 49 ++++++++++++++++++++++++-- SRC/pdlaqr3.f | 83 ++++++++++++++++++++++++++++++++++++++------ SRC/pdlaqr4.f | 47 +++++++++++++++++++++++-- SRC/pdlaqr5.f | 76 ++++++++++++++++++++++++++++++++++++++--- SRC/pdlaqsy.f | 45 ++++++++++++++++++++++++ SRC/pdlared1d.f | 47 ++++++++++++++++++++++++- SRC/pdlared2d.f | 47 ++++++++++++++++++++++++- SRC/pdlarf.f | 49 ++++++++++++++++++++++++-- SRC/pdlarfb.f | 51 +++++++++++++++++++++++++-- SRC/pdlarfg.f | 62 ++++++++++++++++++++++++++++++--- SRC/pdlarft.f | 49 ++++++++++++++++++++++++-- SRC/pdlarz.f | 50 +++++++++++++++++++++++++-- SRC/pdlarzb.f | 55 ++++++++++++++++++++++++++++-- SRC/pdlarzt.f | 44 ++++++++++++++++++++++++ SRC/pdlascl.f | 54 +++++++++++++++++++++++++++-- SRC/pdlase2.f | 67 ++++++++++++++++++++++++++++++++---- SRC/pdlaset.f | 36 +++++++++++++++++-- SRC/pdlasmsub.f | 44 ++++++++++++++++++++++++ SRC/pdlasrt.f | 60 +++++++++++++++++++++++++++++--- SRC/pdlassq.f | 58 ++++++++++++++++++++++++++++--- SRC/pdlatra.f | 43 +++++++++++++++++++++++ SRC/pdlatrd.f | 49 ++++++++++++++++++++++++-- SRC/pdlatrs.f | 51 +++++++++++++++++++++++++-- SRC/pdlatrz.f | 48 ++++++++++++++++++++++++-- SRC/pdlauu2.f | 48 ++++++++++++++++++++++++-- SRC/pdlauum.f | 36 +++++++++++++++++-- SRC/pdlawil.f | 49 ++++++++++++++++++++++++-- 55 files changed, 2695 insertions(+), 116 deletions(-) diff --git a/SRC/pdhseqr.f b/SRC/pdhseqr.f index 6e0f7510..d7e8cb3b 100644 --- a/SRC/pdhseqr.f +++ b/SRC/pdhseqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, $ DESCZ, WORK, LWORK, IWORK, LIWORK, INFO ) * @@ -9,6 +15,7 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -277,13 +284,44 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Decode and check the input parameters. * INFO = 0 ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHI, ILO, INFO, LWORK, LIWORK, + $ N, COMPZ, JOB, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDHSEQR inputs:,IHI:',I5,',ILO:',I5,',INFO:',I5, + $ ',LWORK:',I5,',LIWORK:',I5, + $ ',N:',I5,',COMPZ:',A5,',JOB:',A5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL IF( NPROW.EQ.-1 ) INFO = -(600+CTXT_) IF( INFO.EQ.0 ) THEN @@ -356,18 +394,30 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * Quick return in case of invalid argument. * CALL PXERBLA( ICTXT, 'PDHSEQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE IF( N.EQ.0 ) THEN * * Quick return in case N = 0; nothing to do. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE IF( LQUERY ) THEN * * Quick return in case of a workspace query. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE @@ -422,6 +472,10 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, $ 1, HRSRC, HCSRC ) END IF WI( ILO ) = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -677,6 +731,10 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * WORK(1) = LWKOPT IWORK(1) = LIWKOPT +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDHSEQR diff --git a/SRC/pdlabad.f b/SRC/pdlabad.f index 4fd00d82..9834d5b7 100644 --- a/SRC/pdlabad.f +++ b/SRC/pdlabad.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER ICTXT DOUBLE PRECISION LARGE, SMALL @@ -53,8 +60,24 @@ SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * .. Intrinsic Functions .. INTRINSIC LOG10, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * If it looks like we're on a Cray, take the square root of * SMALL and LARGE to avoid overflow and underflow problems. * @@ -69,6 +92,10 @@ SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) CALL DGAMN2D( ICTXT, 'All', ' ', 1, 1, LARGE, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLABAD diff --git a/SRC/pdlabrd.f b/SRC/pdlabrd.f index c647f7c4..9acdcd19 100644 --- a/SRC/pdlabrd.f +++ b/SRC/pdlabrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ X, IX, JX, DESCX, Y, IY, JY, DESCY, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IX, IY, JA, JX, JY, M, N, NB * .. @@ -269,15 +276,49 @@ SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IX, IY, JA, JX, JY, M, N, + $ NB, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLABRD inputs:,IA:',I5,',IX:',I5,',IY:',I5, + $ ',JA:',I5,',JX:',I5,',JY:',I5, + $ ',M:',I5,',N:',I5,',NB:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IPY = DESCA( MB_ ) + 1 @@ -487,6 +528,10 @@ SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, 20 CONTINUE END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLABRD diff --git a/SRC/pdlacon.f b/SRC/pdlacon.f index 74b9eabd..88fa8cee 100644 --- a/SRC/pdlacon.f +++ b/SRC/pdlacon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, $ EST, KASE ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IV, IX, JV, JX, KASE, N DOUBLE PRECISION EST @@ -180,18 +187,52 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, * .. Save statement .. SAVE * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ESTWORK( 1 ) = EST ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IV, IX, JV, JX, KASE, N, EST, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACON inputs:,IV:',I5,',IX:',I5,',JV:',I5, + $ ',JX:',I5,',KASE:',I5,',N:',I5, + $ ',EST:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIVX, JJVX, IVXROW, IVXCOL ) - IF( MYCOL.NE.IVXCOL ) - $ RETURN + IF( MYCOL.NE.IVXCOL ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IVXROW, NPROW ) IF( MYROW.EQ.IVXROW ) @@ -204,6 +245,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 10 CONTINUE KASE = 1 JUMP = 1 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -241,6 +286,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 30 CONTINUE KASE = 2 JUMP = 2 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 2) @@ -275,6 +324,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, END IF KASE = 1 JUMP = 3 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 3) @@ -315,6 +368,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 100 CONTINUE KASE = 2 JUMP = 4 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 4) @@ -356,6 +413,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 130 CONTINUE KASE = 1 JUMP = 5 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 5) @@ -381,6 +442,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, KASE = 0 * EST = ESTWORK( 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACON diff --git a/SRC/pdlaconsb.f b/SRC/pdlaconsb.f index 1323a205..87403f5d 100644 --- a/SRC/pdlaconsb.f +++ b/SRC/pdlaconsb.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, $ LWORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER I, L, LWORK, M DOUBLE PRECISION H33, H43H34, H44 @@ -181,13 +188,42 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) I, L, LWORK, M, H33, H43H34, + $ H44, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACONSB inputs:,I:',I5,',L:',I5,',LWORK:',I5, + $ ',M:',I5,',H33:',F9.4,',H43H34:',F9.4, + $ ',H44:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) @@ -212,6 +248,10 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, END IF IF( LWORK.LT.7*ISTR2 ) THEN CALL PXERBLA( CONTXT, 'PDLACONSB', 10 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF ISTR3 = 3*ISTR2 @@ -567,6 +607,10 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, * CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACONSB diff --git a/SRC/pdlacp2.f b/SRC/pdlacp2.f index dc7b44c9..4b555358 100644 --- a/SRC/pdlacp2.f +++ b/SRC/pdlacp2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * @@ -5,6 +11,7 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N @@ -166,14 +173,48 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, JA, JB, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACP2 inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',JA:',I5,',JB:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -228,8 +269,13 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, IF( MYCOL.EQ.IACOL ) THEN * MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) - IF( MP.LE.0 ) - $ RETURN + IF( MP.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) @@ -326,8 +372,13 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, IF( MYROW.EQ.IAROW ) THEN * NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) - IF( NQ.LE.0 ) - $ RETURN + IF( NQ.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) @@ -398,6 +449,10 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACP2 diff --git a/SRC/pdlacp3.f b/SRC/pdlacp3.f index 3b1aa0ed..6e31f36e 100644 --- a/SRC/pdlacp3.f +++ b/SRC/pdlacp3.f @@ -1,4 +1,11 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK routine (version 1.7) -- @@ -109,7 +116,7 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * II (global input) INTEGER * By using REV 0 & 1, data can be sent out and returned again. * If REV=0, then II is destination row index for the node(s) -* receiving the replicated B. +* receiving the replicated B. * If II>=0,JJ>=0, then node (II,JJ) receives the data * If II=-1,JJ>=0, then all rows in column JJ receive the * data @@ -122,7 +129,7 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * Similar description as II above * * REV (global input) INTEGER -* Use REV = 0 to send global A into locally replicated B +* Use REV = 0 to send global A into locally replicated B * (on node (II,JJ)). * Use REV <> 0 to send locally replicated B from node (II,JJ) * to its owner (which changes depending on its location in @@ -158,10 +165,31 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.LE.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) @@ -170,6 +198,19 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) JAFIRST = DESCA( CSRC_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) I, II, JJ, LDB, M, REV, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACP3 inputs:,I:',I5,',II:',I5,',JJ:',I5, + $ ',LDB:',I5,',M:',I5,',REV:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( REV.EQ.0 ) THEN DO 20 IDI = 1, M @@ -306,6 +347,10 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) IF( IDJ.LE.IFIN ) $ GO TO 30 END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACP3 diff --git a/SRC/pdlacpy.f b/SRC/pdlacpy.f index 741847b7..5b59733f 100644 --- a/SRC/pdlacpy.f +++ b/SRC/pdlacpy.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N @@ -160,10 +167,31 @@ SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) @@ -224,6 +252,10 @@ SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACPY diff --git a/SRC/pdlaed0.f b/SRC/pdlaed0.f index a6368e1c..a01f1b09 100644 --- a/SRC/pdlaed0.f +++ b/SRC/pdlaed0.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, IQ, JQ, N * .. @@ -99,20 +106,58 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, IQ, JQ, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED0 inputs:,INFO:',I5,',IQ:',I5,',JQ:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF INFO = 0 IF( DESCQ( NB_ ).GT.N .OR. N.LT.2 ) $ INFO = -1 IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDLAED0', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -162,6 +207,10 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) $ WORK, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'DSTEQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) THEN @@ -228,6 +277,10 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * end while * 90 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED0 diff --git a/SRC/pdlaed1.f b/SRC/pdlaed1.f index 413d149b..69b9b446 100644 --- a/SRC/pdlaed1.f +++ b/SRC/pdlaed1.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, $ IWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER ID, INFO, IQ, JQ, N, N1 DOUBLE PRECISION RHO @@ -136,16 +143,51 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) ID, INFO, IQ, JQ, N, N1, RHO, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED1 inputs:,ID:',I5,',INFO:',I5,',IQ:',I5, + $ ',JQ:',I5,',N:',I5,',N1:',I5, + $ ',RHO:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) @@ -158,13 +200,22 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDLAED1', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * The following values are integer pointers which indicate * the portion of the workspace used by a particular array @@ -265,6 +316,10 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, END IF * 20 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED1 diff --git a/SRC/pdlaed2.f b/SRC/pdlaed2.f index 72d43515..5ed50eee 100644 --- a/SRC/pdlaed2.f +++ b/SRC/pdlaed2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM, $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN, @@ -8,6 +14,7 @@ SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N, $ N1, NB, NN, NN1, NN2, NPCOL @@ -177,15 +184,53 @@ SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, * .. Local Arrays .. INTEGER PTT( 4 ) * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_PINFO( IAM, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DCOL, DROW, IB1, IB2, ICTXT, + $ K, LDQ, LDQ2, N, N1, + $ NB, NN, NN1, NN2, NPCOL, RHO, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED2 inputs:,DCOL:',I5,',DROW:',I5, + $ ',IB1:',I5,',IB2:',I5,',ICTXT:',I5,',K:',I5, + $ ',LDQ:',I5,',LDQ2:',I5,',N:',I5, + $ ',N1:',I5,',NB:',I5,',NN:',I5,',NN1:',I5, + $ ',NN2:',I5,',NPCOL:',I5,',RHO:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NUMROC( N, NB, MYROW, DROW, NPROW ) * N2 = N - N1 @@ -448,6 +493,10 @@ SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, NN2 = IE2 - IB2 + 1 NN = MAX( IE1, IE2 ) - MIN( IB1, IB2 ) + 1 220 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED2 diff --git a/SRC/pdlaed3.f b/SRC/pdlaed3.f index 9f1f6305..6e43c7dd 100644 --- a/SRC/pdlaed3.f +++ b/SRC/pdlaed3.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, $ INDXR, INDXC, CTOT, NPCOL, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL DOUBLE PRECISION RHO @@ -148,18 +155,54 @@ SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, * .. Intrinsic Functions .. INTRINSIC MOD, SIGN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters. * INFO = 0 * * Quick return if possible * - IF( K.EQ.0 ) - $ RETURN + IF( K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DCOL, DROW, ICTXT, INFO, K, + $ LDU, N, NB, NPCOL, RHO, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED3 inputs:,DCOL:',I5,',DROW:',I5, + $ ',ICTXT:',I5,',INFO:',I5,',K:',I5,',LDU:',I5, + $ ',N:',I5,',NB:',I5,',NPCOL:',I5, + $ ',RHO:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * ROW = DROW COL = DCOL @@ -344,6 +387,10 @@ SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, * 190 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED3 diff --git a/SRC/pdlaedz.f b/SRC/pdlaedz.f index df903c34..41df3748 100644 --- a/SRC/pdlaedz.f +++ b/SRC/pdlaedz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER ID, IQ, JQ, LDQ, N, N1 * .. @@ -46,15 +53,50 @@ SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) INTEGER NUMROC EXTERNAL NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCQ( CTXT_ ) NB = DESCQ( NB_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) ID, IQ, JQ, LDQ, N, N1, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAEDZ inputs:,ID:',I5,',IQ:',I5,',JQ:',I5, + $ ',LDQ:',I5,',N:',I5,',N1:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( ID, ID, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) N2 = N - N1 @@ -145,6 +187,10 @@ SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) CALL DGEBR2D( ICTXT, 'All', ' ', N, 1, Z, N, IQROW, IQCOL ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAEDZ diff --git a/SRC/pdlaevswp.f b/SRC/pdlaevswp.f index 7e6ed409..ba9cd9ac 100644 --- a/SRC/pdlaevswp.f +++ b/SRC/pdlaevswp.f @@ -1,4 +1,10 @@ * +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, $ WORK, LWORK ) @@ -8,6 +14,7 @@ SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, * and University of California, Berkeley. * April 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IZ, JZ, LDZI, LWORK, N * .. @@ -152,11 +159,45 @@ SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IZ, JZ, LDZI, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAEVSWP inputs:,IZ:',I5,',JZ:',I5,',LDZI:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF IAM = MYROW + MYCOL*NPROW IAM = MYROW*NPCOL + MYCOL * @@ -279,6 +320,10 @@ SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, 100 CONTINUE * 110 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAEVSWP diff --git a/SRC/pdlahqr.f b/SRC/pdlahqr.f index 7d857dd1..6f6fb7cd 100644 --- a/SRC/pdlahqr.f +++ b/SRC/pdlahqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, $ ILWORK, INFO ) @@ -6,6 +12,7 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N @@ -279,14 +286,35 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SIGN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) * ITERMAX = 0 - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -297,6 +325,21 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) WANTT, WANTZ, IHI, IHIZ, ILO, + $ ILOZ, ILWORK, INFO, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAHQR inputs:,WANTT:',L2,',WANTZ:',L2, + $ ',IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',ILWORK:',I5,',INFO:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) @@ -346,6 +389,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PDLAHQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -373,6 +420,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, WR( ILO ) = ZERO END IF WI( ILO ) = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -2002,6 +2053,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * Failure to converge in remaining number of iterations * INFO = I +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * 430 CONTINUE @@ -2070,6 +2125,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, 450 CONTINUE CALL DGSUM2D( CONTXT, 'All', ' ', N, 1, WR, N, -1, -1 ) CALL DGSUM2D( CONTXT, 'All', ' ', N, 1, WI, N, -1, -1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * END OF PDLAHQR diff --git a/SRC/pdlahrd.f b/SRC/pdlahrd.f index da6f7267..30b9c536 100644 --- a/SRC/pdlahrd.f +++ b/SRC/pdlahrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, $ DESCY, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IY, JA, JY, K, N, NB * .. @@ -158,15 +165,49 @@ SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.LE.1 ) - $ RETURN + IF( N.LE.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IY, JA, JY, K, N, NB, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAHRD inputs:,IA:',I5,',IY:',I5,',JA:',I5, + $ ',JY:',I5,',K:',I5,',N:',I5,',NB:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, @@ -280,6 +321,10 @@ SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, * CALL PDELSET( A, K+NB+IA-1, J, DESCA, EI ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAHRD diff --git a/SRC/pdlamch.f b/SRC/pdlamch.f index 9a3ebb6c..99efe269 100644 --- a/SRC/pdlamch.f +++ b/SRC/pdlamch.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER CMACH INTEGER ICTXT @@ -62,7 +69,23 @@ DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH, LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * TEMP = DLAMCH( CMACH ) IDUMM = 0 diff --git a/SRC/pdlamr1d.f b/SRC/pdlamr1d.f index 08db3c6f..6f9d7d14 100644 --- a/SRC/pdlamr1d.f +++ b/SRC/pdlamr1d.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * and University of California, Berkeley. * October 15, 1999 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, JA, JB, N * .. @@ -105,15 +112,42 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) INTEGER NUMROC EXTERNAL NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * DO 10 I = 1, DLEN_ DESCAA( I ) = DESCA( I ) @@ -129,6 +163,18 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) CALL PDGEMR2D( 1, N, A, IA, JA, DESCAA, B, IB, JB, DESCBB, ICTXT ) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, JA, JB, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAMR1D inputs:,IA:',I5,',IB:',I5,',JA:',I5, + $ ',JB:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NQ = NUMROC( N, DESCB( NB_ ), MYCOL, 0, NPCOL ) * IF( MYROW.EQ.0 ) THEN @@ -137,6 +183,10 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) CALL DGEBR2D( ICTXT, 'C', ' ', NQ, 1, B, NQ, 0, MYCOL ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAMR1D diff --git a/SRC/pdlamve.f b/SRC/pdlamve.f index 7686cc55..711db2e5 100644 --- a/SRC/pdlamve.f +++ b/SRC/pdlamve.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, DWORK ) * @@ -8,6 +14,7 @@ SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -165,13 +172,42 @@ SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Find underlying mesh properties. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, JA, JB, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAMVE inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',JA:',I5,',JB:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Decode input parameters. * UPPER = LSAME( UPLO, 'U' ) @@ -198,6 +234,10 @@ SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAMVE diff --git a/SRC/pdlange.f b/SRC/pdlange.f index 98be0941..60310cc8 100644 --- a/SRC/pdlange.f +++ b/SRC/pdlange.f @@ -1,5 +1,12 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, $ WORK ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -175,12 +182,40 @@ DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, IA, JA, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLANGE inputs:,NORM:',A5,',IA:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) @@ -332,6 +367,10 @@ DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, * PDLANGE = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANGE diff --git a/SRC/pdlanhs.f b/SRC/pdlanhs.f index 2efe6a6f..6fedf905 100644 --- a/SRC/pdlanhs.f +++ b/SRC/pdlanhs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, $ WORK ) * @@ -6,6 +12,7 @@ DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, N @@ -169,12 +176,40 @@ DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, IA, JA, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT(' inputs:,NORM:',A5,',IA:',I5,',JA:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -733,6 +768,10 @@ DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, * PDLANHS = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANHS diff --git a/SRC/pdlansy.f b/SRC/pdlansy.f index 9d100b22..54063997 100644 --- a/SRC/pdlansy.f +++ b/SRC/pdlansy.f @@ -1,5 +1,12 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -193,12 +200,40 @@ DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, UPLO, IA, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT(' inputs:,NORM:',A5,',UPLO:',A5,',IA:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * @@ -857,6 +892,10 @@ DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, * PDLANSY = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANSY diff --git a/SRC/pdlantr.f b/SRC/pdlantr.f index 9484dfbd..ab476569 100644 --- a/SRC/pdlantr.f +++ b/SRC/pdlantr.f @@ -1,5 +1,12 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, $ IA, JA, DESCA, WORK ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -191,12 +198,41 @@ DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, NORM, UPLO, IA, JA, M, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT(' inputs:,DIAG:',A5,',NORM:',A5,',UPLO:',A5, + $ ',IA:',I5,',JA:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * UDIAG = LSAME( DIAG, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, @@ -1097,6 +1133,10 @@ DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, * PDLANTR = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANTR diff --git a/SRC/pdlapiv.f b/SRC/pdlapiv.f index 50613de0..535b3d9d 100644 --- a/SRC/pdlapiv.f +++ b/SRC/pdlapiv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, $ DESCA, IPIV, IP, JP, DESCIP, IWORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER*1 DIREC, PIVROC, ROWCOL INTEGER IA, IP, JA, JP, M, N @@ -222,19 +229,55 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIREC, PIVROC, ROWCOL, IA, IP, + $ JA, JP, M, N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDLAPIV inputs:,DIREC:',A5,',PIVROC:',A5, + $ ',ROWCOL:',A5,',IA:',I5,',IP:',I5, + $ ',JA:',I5,',JP:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF ROWPVT = LSAME( ROWCOL, 'R' ) * * If we're pivoting the rows of sub( A ) * IF( ROWPVT ) THEN - IF( M.LE.1 .OR. N.LT.1 ) - $ RETURN + IF( M.LE.1 .OR. N.LT.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * If the pivot vector is already distributed correctly * @@ -293,8 +336,13 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, * Otherwise, we're pivoting the columns of sub( A ) * ELSE - IF( M.LT.1 .OR. N.LE.1 ) - $ RETURN + IF( M.LT.1 .OR. N.LE.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * If the pivot vector is already distributed correctly * @@ -349,6 +397,10 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, END IF END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAPIV diff --git a/SRC/pdlapv2.f b/SRC/pdlapv2.f index a8e48204..cc137bf1 100644 --- a/SRC/pdlapv2.f +++ b/SRC/pdlapv2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, IP, JA, JP, M, N @@ -167,15 +174,41 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * ROWPVT = LSAME( ROWCOL, 'R' ) IF( ROWPVT ) THEN - IF( M.LE.1 .OR. N.LT.1 ) - $ RETURN + IF( M.LE.1 .OR. N.LT.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF ELSE - IF( M.LT.1 .OR. N.LE.1 ) - $ RETURN + IF( M.LT.1 .OR. N.LE.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF END IF FORWRD = LSAME( DIREC, 'F' ) * @@ -188,6 +221,20 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIREC, ROWCOL, IA, IP, JA, JP, + $ M, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLAPV2 inputs:,DIREC:',A5,',ROWCOL:',A5, + $ ',IA:',I5,',IP:',I5,',JA:',I5,',JP:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * If I'm applying pivots from beginning to end (e.g., repeating * pivoting done earlier). Thus this section computes P * sub( A ). * @@ -406,6 +453,10 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End PDLAPV2 diff --git a/SRC/pdlaqge.f b/SRC/pdlaqge.f index 9203c371..fb95e914 100644 --- a/SRC/pdlaqge.f +++ b/SRC/pdlaqge.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED INTEGER IA, JA, M, N @@ -177,12 +184,32 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) THEN EQUED = 'N' +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -190,6 +217,21 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, IA, JA, M, N, AMAX, COLCND, + $ ROWCND, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDLAQGE inputs:,EQUED:',A5,',IA:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',AMAX:',F9.4, + $ ',COLCND:',F9.4,',ROWCND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) @@ -263,6 +305,10 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAQGE diff --git a/SRC/pdlaqr0.f b/SRC/pdlaqr0.f index 9153500d..0f34df04 100644 --- a/SRC/pdlaqr0.f +++ b/SRC/pdlaqr0.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, $ DESCH, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, $ IWORK, LIWORK, INFO, RECLEVEL ) @@ -10,6 +16,7 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -290,10 +297,43 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F INFO = 0 ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHI, IHIZ, ILO, ILOZ, INFO, + $ LIWORK, LWORK, N, RECLEVEL, + $ WANTT, WANTZ, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR0 inputs:,IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',INFO:',I5,',LIWORK:',I5, + $ ',LWORK:',I5,',N:',I5,',RECLEVEL:',I5, + $ ',WANTT:',L2,',WANTZ:',L2, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL RECURSION = RECLEVEL .LT. RECMAX * @@ -302,6 +342,10 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, IF( N.EQ.0 ) THEN WORK( 1 ) = ONE IWORK( 1 ) = 1 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -425,6 +469,10 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, IF( LQUERY ) THEN WORK( 1 ) = DBLE( LWKOPT ) IWORK( 1 ) = LIWKOPT +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -922,6 +970,10 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, IWORK( 2 ) = SWEEP IWORK( 3 ) = TOTNS END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAQR0 diff --git a/SRC/pdlaqr1.f b/SRC/pdlaqr1.f index 2226c757..09cfffec 100644 --- a/SRC/pdlaqr1.f +++ b/SRC/pdlaqr1.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ DESCA, WR, WI, ILOZ, IHIZ, Z, $ DESCZ, WORK, LWORK, IWORK, @@ -11,6 +17,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -301,13 +308,34 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SIGN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -318,6 +346,21 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) WANTT, WANTZ, IHI, IHIZ, ILO, + $ ILOZ, ILWORK, INFO, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR1 inputs:,WANTT:',L2,',WANTZ:',L2, + $ ',IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',ILWORK:',I5,',INFO:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) @@ -341,6 +384,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IF( LWORK.EQ.-1 .OR. ILWORK.EQ.-1 ) THEN WORK( 1 ) = DBLE( LWKOPT ) IWORK( 1 ) = 3 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LWORK.LT.LWKOPT ) THEN INFO = -15 @@ -371,6 +418,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PDLAQR1', -INFO ) WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -400,6 +451,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, END IF WI( ILO ) = ZERO WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -415,6 +470,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ WORK( S2+1 ), NH, WORK( S3+1 ), 4*LDS*LDS, $ INFO ) WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1326,7 +1385,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ ( MOD( ISTART-1, HBL ).LT.HBL-2 ) .AND. $ ( ICURROW( KI ).EQ.MYROW ) ) THEN IROW1 = MIN( K2( KI )+1, I-1 ) + 1 - CALL INFOG1L( IROW1, HBL, NPCOL, MYCOL, DESCA(CSRC_), + CALL INFOG1L( IROW1, HBL, NPCOL, MYCOL, DESCA(CSRC_), $ ITMP1, ITMP2 ) ITMP2 = NUMROC( I2, HBL, MYCOL, DESCA(CSRC_), NPCOL ) II = KROW( KI ) @@ -1373,7 +1432,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * IROW1 = KROW( KI ) IROW2 = KP2ROW( KI ) - CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, + CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), ICOL1, ICOL2 ) ICOL2 = NUMROC(I2,HBL,MYCOL,DESCA(CSRC_),NPCOL ) IF( ( MOD( K-1, HBL ).LT.HBL-2 ) .OR. @@ -1441,7 +1500,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * IROW1 = KROW( KI ) + K - ISTART IROW2 = KP2ROW( KI ) + K - ISTART - CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, + CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_),ICOL1, ICOL2 ) ICOL2 = NUMROC(I2,HBL,MYCOL,DESCA(CSRC_),NPCOL ) IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND. @@ -1531,7 +1590,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * IROW1 = KROW( KI ) + K - ISTART IROW2 = KP2ROW( KI ) + K - ISTART - CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, + CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), ICOL1, ICOL2 ) ICOL2 = NUMROC(I2,HBL,MYCOL,DESCA(CSRC_),NPCOL ) IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND. @@ -1972,9 +2031,9 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, CALL DGESD2D( CONTXT, LIHIH-LILOH+1, 1, $ A( ( ITMP1-1 )*LDA+LILOH ), $ LDA, MYROW, RIGHT ) - CALL INFOG1L( K, HBL, NPCOL, MYCOL, + CALL INFOG1L( K, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), ITMP1, ITMP2 ) - ITMP2 = NUMROC( K+1, HBL, MYCOL, + ITMP2 = NUMROC( K+1, HBL, MYCOL, $ DESCA(CSRC_), NPCOL ) CALL DGERV2D( CONTXT, LIHIH-LILOH+1, 1, $ A( ( ITMP1-1 )*LDA+LILOH ), @@ -2090,7 +2149,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IF( ( MOD( K1( KI )-1, HBL ).GE.HBL-2 ) .AND. $ ( ( MYCOL.EQ.ICURCOL( KI ) ) .OR. ( RIGHT.EQ. $ ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN - CALL INFOG1L( K2( KI )+1, HBL, NPCOL, MYCOL, + CALL INFOG1L( K2( KI )+1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), KCOL( KI ), ITMP2 ) ITMP2 = NUMROC( N, HBL, MYCOL, DESCA(CSRC_), NPCOL ) END IF @@ -2099,7 +2158,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN CALL INFOG1L( 1, HBL, NPCOL, MYCOL,DESCA(CSRC_),ITMP2, $ KP2COL( KI ) ) - KP2COL( KI ) = NUMROC( K2( KI )+3, HBL, MYCOL, + KP2COL( KI ) = NUMROC( K2( KI )+3, HBL, MYCOL, $ DESCA(CSRC_), NPCOL ) END IF K1( KI ) = K2( KI ) + 1 @@ -2131,6 +2190,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * INFO = I WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * 430 CONTINUE @@ -2200,6 +2263,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ INFO ) IF( INFO.NE.0 ) THEN WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF IF( NODE.NE.0 ) THEN @@ -2239,6 +2306,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IWORK( 1 ) = TOTIT IWORK( 2 ) = TOTSW IWORK( 3 ) = TOTNS +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * END OF PDLAQR1 diff --git a/SRC/pdlaqr2.f b/SRC/pdlaqr2.f index bae5fb2b..c910d523 100644 --- a/SRC/pdlaqr2.f +++ b/SRC/pdlaqr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, $ ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT, $ V, LDV, WR, WI, WORK, LWORK ) @@ -9,6 +15,7 @@ SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -241,12 +248,33 @@ SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -257,6 +285,23 @@ SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHIZ, ILOZ, KBOT, KTOP, LDT, + $ LDV, LWORK, N, ND, NS, + $ NW, WANTT, WANTZ, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR2 inputs:,IHIZ:',I5,',ILOZ:',I5, + $ ',KBOT:',I5,',KTOP:',I5,',LDT:',I5,',LDV:',I5, + $ ',LWORK:',I5,',N:',I5,',ND:',I5, + $ ',NS:',I5,',NW:',I5,',WANTT:',L2, + $ ',WANTZ:',L2,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) diff --git a/SRC/pdlaqr3.f b/SRC/pdlaqr3.f index caa09756..0581919d 100644 --- a/SRC/pdlaqr3.f +++ b/SRC/pdlaqr3.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, $ DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND, $ SR, SI, V, DESCV, NH, T, DESCT, NV, @@ -12,6 +18,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -174,7 +181,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * are stored in SR(KBOT-ND+1) through SR(KBOT) and * SI(KBOT-ND+1) through SI(KBOT), respectively. * -* V (global workspace) DOUBLE PRECISION array, dimension +* V (global workspace) DOUBLE PRECISION array, dimension * (DESCV(LLD_),*) * An NW-by-NW distributed work array. * @@ -184,7 +191,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * NH (input) INTEGER scalar * The number of columns of T. NH.GE.NW. * -* T (global workspace) DOUBLE PRECISION array, dimension +* T (global workspace) DOUBLE PRECISION array, dimension * (DESCV(LLD_),*) * * DESCT (global and local input) INTEGER array of dimension DLEN_. @@ -194,7 +201,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * The number of rows of work array WV available for * workspace. NV.GE.NW. * -* WV (global workspace) DOUBLE PRECISION array, dimension +* WV (global workspace) DOUBLE PRECISION array, dimension * (DESCW(LLD_),*) * * DESCW (global and local input) INTEGER array of dimension DLEN_. @@ -282,9 +289,42 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHIZ, ILOZ, KBOT, KTOP, LWORK, + $ N, ND, NH, NS, NV, + $ NW, LIWORK, RECLEVEL, WANTT, WANTZ, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR3 inputs:,IHIZ:',I5,',ILOZ:',I5, + $ ',KBOT:',I5,',KTOP:',I5,',LWORK:',I5, + $ ',N:',I5,',ND:',I5,',NH:',I5,',NS:',I5, + $ ',NV:',I5,',NW:',I5,',LIWORK:',I5, + $ ',RECLEVEL:',I5,',WANTT:',L2,',WANTZ:',L2, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Extract local leading dimensions, blockfactors, offset for @@ -342,7 +382,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, $ .AND. RECLEVEL.LT.RECMAX ) THEN CALL PDLAQR0( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH, $ JW+IROFFH, T, DESCT, SR, SI, 1, JW, V, DESCV, - $ WORK, -1, IWORK, LIWORK-NSEL, INFQR, + $ WORK, -1, IWORK, LIWORK-NSEL, INFQR, $ RECLEVEL+1 ) LWK3 = INT( WORK( 1 ) ) IWRK1 = IWORK( 1 ) @@ -410,18 +450,33 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * IWORK(1:NSEL) is used as the array SELECT for PDTRORD. * IWORK( 1 ) = ILWKOPT + NSEL - IF( LQUERY ) - $ RETURN + IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Nothing to do for an empty active block ... NS = 0 ND = 0 - IF( KTOP.GT.KBOT ) - $ RETURN + IF( KTOP.GT.KBOT ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ... nor for an empty deflation window. * - IF( NW.LT.1 ) - $ RETURN + IF( NW.LT.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Machine constants. * @@ -457,6 +512,10 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, IF( KWTOP.GT.KTOP ) $ CALL PDELSET( H, KWTOP, KWTOP-1 , DESCH, ZERO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -489,6 +548,10 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, CALL PDELSET( H, I+1, I+1, DESCH, DD ) END IF WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * diff --git a/SRC/pdlaqr4.f b/SRC/pdlaqr4.f index 105deaa7..c626b396 100644 --- a/SRC/pdlaqr4.f +++ b/SRC/pdlaqr4.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK, $ LWORK, INFO ) @@ -9,6 +15,7 @@ SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -226,14 +233,35 @@ SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 - IF( N.EQ.0 .OR. NH.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NH.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -244,6 +272,21 @@ SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) WANTT, WANTZ, IHI, IHIZ, ILO, + $ ILOZ, INFO, LDT, LDV, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR4 inputs:,WANTT:',L2,',WANTZ:',L2, + $ ',IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',INFO:',I5,',LDT:',I5,',LDV:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) diff --git a/SRC/pdlaqr5.f b/SRC/pdlaqr5.f index 490c42fb..11d6a788 100644 --- a/SRC/pdlaqr5.f +++ b/SRC/pdlaqr5.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK, $ LWORK, IWORK, LIWORK ) @@ -9,6 +15,7 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -69,7 +76,7 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * parts of the NSHFTS shifts of origin that define the * multi-shift QR sweep. * -* H (local input/output) DOUBLE PRECISION array of size +* H (local input/output) DOUBLE PRECISION array of size * (DESCH(LLD_),*) * On input H contains a Hessenberg matrix. On output a * multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied @@ -179,11 +186,44 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, EXTERNAL DGEMM, DLABAD, DLAMOV, DLAQR1, DLARFG, DLASET, $ DTRMM, DLAQR6 * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHIZ, ILOZ, KACC22, KBOT, KTOP, + $ N, NSHFTS, LWORK, + $ LIWORK, WANTT, WANTZ, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR5 inputs:,IHIZ:',I5,',ILOZ:',I5, + $ ',KACC22:',I5,',KBOT:',I5,',KTOP:',I5, + $ ',N:',I5,',NSHFTS:',I5,',LWORK:',I5, + $ ',LIWORK:',I5,',WANTT:',L2,',WANTZ:',L2, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL LLDH = DESCH( LLD_ ) LLDZ = DESCZ( LLD_ ) @@ -193,14 +233,24 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * * If there are no shifts, then there is nothing to do. * - IF( .NOT. LQUERY .AND. NSHFTS.LT.2 ) - $ RETURN + IF( .NOT. LQUERY .AND. NSHFTS.LT.2 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * If the active block is empty or 1-by-1, then there * is nothing to do. * - IF( .NOT. LQUERY .AND. KTOP.GE.KBOT ) - $ RETURN + IF( .NOT. LQUERY .AND. KTOP.GE.KBOT ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Shuffle shifts into pairs of real shifts and pairs of * complex conjugate shifts assuming complex conjugate @@ -307,11 +357,19 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ MAX( HROWS*NB, HCOLS*NB ) WORK(1) = DBLE(LWKOPT) IWORK(1) = 5*NUMWIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Check if KTOP and KBOT are valid. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( KTOP.LT.1 .OR. KBOT.GT.N ) RETURN * * Create and chase NUMWIN chains of NBMPS bulges. @@ -941,6 +999,10 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * * If we have no more windows, return. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( ANMWIN.LT.1 ) RETURN * ELSE @@ -2247,6 +2309,10 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * * If we have no more windows, return. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( ANMWIN.LT.1 ) RETURN * * Check for any more windows to bring over the border. diff --git a/SRC/pdlaqsy.f b/SRC/pdlaqsy.f index c2763ee9..58bfd7fc 100644 --- a/SRC/pdlaqsy.f +++ b/SRC/pdlaqsy.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED, UPLO INTEGER IA, JA, N @@ -179,12 +186,32 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * IF( N.LE.0 ) THEN EQUED = 'N' +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -192,6 +219,20 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, UPLO, IA, JA, N, AMAX, + $ SCOND, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLAQSY inputs:,EQUED:',A5,',UPLO:',A5, + $ ',IA:',I5,',JA:',I5,',N:',I5,',AMAX:',F9.4, + $ ',SCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) LDA = DESCA( LLD_ ) @@ -352,6 +393,10 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAQSY diff --git a/SRC/pdlared1d.f b/SRC/pdlared1d.f index 41a5fa5f..bd94f746 100644 --- a/SRC/pdlared1d.f +++ b/SRC/pdlared1d.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * and University of California, Berkeley. * December 12, 2005 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. @@ -138,12 +145,46 @@ SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, LWORK, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLARED1D inputs:,IA:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NB = DESC( MB_ ) * * @@ -166,6 +207,10 @@ SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) 20 CONTINUE 30 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARED1D diff --git a/SRC/pdlared2d.f b/SRC/pdlared2d.f index 43099014..8d1042c3 100644 --- a/SRC/pdlared2d.f +++ b/SRC/pdlared2d.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * and University of California, Berkeley. * December 12, 2005 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. @@ -135,12 +142,46 @@ SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, LWORK, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLARED2D inputs:,IA:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF MB = DESC( MB_ ) * DO 30 PROW = 0, NPROW - 1 @@ -163,6 +204,10 @@ SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) 20 CONTINUE 30 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PSLARED2D diff --git a/SRC/pdlarf.f b/SRC/pdlarf.f index 41368d6d..bd3a4104 100644 --- a/SRC/pdlarf.f +++ b/SRC/pdlarf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N @@ -257,18 +264,52 @@ SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, IC, INCV, IV, JC, JV, + $ M, N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARF inputs:,SIDE:',A5,',IC:',I5,',INCV:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, @@ -805,6 +846,10 @@ SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARF diff --git a/SRC/pdlarfb.f b/SRC/pdlarfb.f index 2c3b426f..94887287 100644 --- a/SRC/pdlarfb.f +++ b/SRC/pdlarfb.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, $ JV, DESCV, T, C, IC, JC, DESCC, WORK ) * @@ -5,6 +11,7 @@ SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS, DIRECT, STOREV INTEGER IC, IV, JC, JV, K, M, N @@ -247,17 +254,53 @@ SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, DIRECT, STOREV, + $ IC, IV, JC, JV, K, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLARFB inputs:,SIDE:',A5,',TRANS:',A5, + $ ',DIRECT:',A5,',STOREV:',A5,',IC:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5,',K:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'T' @@ -878,6 +921,10 @@ SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARFB diff --git a/SRC/pdlarfg.f b/SRC/pdlarfg.f index 653dfa5d..a7567557 100644 --- a/SRC/pdlarfg.f +++ b/SRC/pdlarfg.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, $ TAU ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IAX, INCX, IX, JAX, JX, N DOUBLE PRECISION ALPHA @@ -166,12 +173,41 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, * .. Intrinsic Functions .. INTRINSIC ABS, SIGN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IAX, INCX, IX, JAX, JX, N, ALPHA, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARFG inputs:,IAX:',I5,',INCX:',I5,',IX:',I5, + $ ',JAX:',I5,',JX:',I5,',N:',I5, + $ ',ALPHA:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( INCX.EQ.DESCX( M_ ) ) THEN * @@ -180,8 +216,13 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, CALL INFOG2L( IX, JAX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * - IF( MYROW.NE.IXROW ) - $ RETURN + IF( MYROW.NE.IXROW ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Broadcast X(IAX,JAX) across the process row. * @@ -203,8 +244,13 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, CALL INFOG2L( IAX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * - IF( MYCOL.NE.IXCOL ) - $ RETURN + IF( MYCOL.NE.IXCOL ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Broadcast X(IAX,JAX) across the process column. * @@ -223,6 +269,10 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, * IF( N.LE.0 ) THEN TAU( INDXTAU ) = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -274,6 +324,10 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, END IF END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARFG diff --git a/SRC/pdlarft.f b/SRC/pdlarft.f index 9a3ae474..4980f230 100644 --- a/SRC/pdlarft.f +++ b/SRC/pdlarft.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N @@ -197,15 +204,49 @@ SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.LE.0 .OR. K.LE.0 ) - $ RETURN + IF( N.LE.0 .OR. K.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIRECT, STOREV, IV, JV, K, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARFT inputs:,DIRECT:',A5,',STOREV:',A5, + $ ',IV:',I5,',JV:',I5,',K:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * FORWARD = LSAME( DIRECT, 'F' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, @@ -531,6 +572,10 @@ SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARFT diff --git a/SRC/pdlarz.f b/SRC/pdlarz.f index f45c1372..d0798568 100644 --- a/SRC/pdlarz.f +++ b/SRC/pdlarz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N @@ -266,18 +273,53 @@ SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, IC, INCV, IV, JC, JV, + $ L, M, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLARZ inputs:,SIDE:',A5,',IC:',I5,',INCV:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5, + $ ',L:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) @@ -907,6 +949,10 @@ SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARZ diff --git a/SRC/pdlarzb.f b/SRC/pdlarzb.f index 096fba5e..b6736725 100644 --- a/SRC/pdlarzb.f +++ b/SRC/pdlarzb.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK ) * @@ -5,6 +11,7 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIRECT, SIDE, STOREV, TRANS INTEGER IC, IV, JC, JV, K, L, M, N @@ -252,18 +259,54 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIRECT, SIDE, STOREV, TRANS, + $ IC, IV, JC, JV, K, L, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARZB inputs:,DIRECT:',A5,',SIDE:',A5, + $ ',STOREV:',A5,',TRANS:',A5,',IC:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5,',K:',I5, + $ ',L:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Check for currently supported options * INFO = 0 @@ -275,6 +318,10 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLARZB', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -604,6 +651,10 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARZB diff --git a/SRC/pdlarzt.f b/SRC/pdlarzt.f index 5baac6cd..affd629f 100644 --- a/SRC/pdlarzt.f +++ b/SRC/pdlarzt.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N @@ -209,13 +216,42 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIRECT, STOREV, IV, JV, K, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARZT inputs:,DIRECT:',A5,',STOREV:',A5, + $ ',IV:',I5,',JV:',I5,',K:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Check for currently supported options * INFO = 0 @@ -227,6 +263,10 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLARZT', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -292,6 +332,10 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARZT diff --git a/SRC/pdlascl.f b/SRC/pdlascl.f index 3925ed45..3f9d37fa 100644 --- a/SRC/pdlascl.f +++ b/SRC/pdlascl.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TYPE INTEGER IA, INFO, JA, M, N @@ -161,13 +168,43 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TYPE, IA, INFO, JA, M, N, CFROM, + $ CTO, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLASCL inputs:,TYPE:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',M:',I5,',N:',I5, + $ ',CFROM:',F9.4,',CTO:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * IF( NPROW.EQ.-1 ) THEN @@ -199,13 +236,22 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLASCL', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. M.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. M.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get machine parameters * @@ -521,6 +567,10 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, IF( .NOT.DONE ) $ GO TO 10 * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASCL diff --git a/SRC/pdlase2.f b/SRC/pdlase2.f index a23e640b..ac5c292c 100644 --- a/SRC/pdlase2.f +++ b/SRC/pdlase2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N @@ -154,14 +161,48 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, JA, M, N, ALPHA, BETA, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASE2 inputs:,UPLO:',A5,',IA:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',ALPHA:',F9.4, + $ ',BETA:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -212,8 +253,13 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) IF( MYCOL.EQ.IACOL ) THEN * MPA = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) - IF( MPA.LE.0 ) - $ RETURN + IF( MPA.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYROW.EQ.IAROW ) $ MPA = MPA - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) @@ -321,8 +367,13 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) IF( MYROW.EQ.IAROW ) THEN * NQA = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) - IF( NQA.LE.0 ) - $ RETURN + IF( NQA.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYCOL.EQ.IACOL ) $ NQA = NQA - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) @@ -404,6 +455,10 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASE2 diff --git a/SRC/pdlaset.f b/SRC/pdlaset.f index f297b82e..5b8d94b0 100644 --- a/SRC/pdlaset.f +++ b/SRC/pdlaset.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N @@ -149,10 +156,31 @@ SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN @@ -213,6 +241,10 @@ SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASET diff --git a/SRC/pdlasmsub.f b/SRC/pdlasmsub.f index 1feda040..5c19aa7b 100644 --- a/SRC/pdlasmsub.f +++ b/SRC/pdlasmsub.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER I, K, L, LWORK DOUBLE PRECISION SMLNUM @@ -165,7 +172,23 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) @@ -174,6 +197,19 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) JAFIRST = DESCA( CSRC_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) I, K, L, LWORK, SMLNUM, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASMSUB inputs:,I:',I5,',K:',I5,',L:',I5, + $ ',LWORK:',I5,',SMLNUM:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) @@ -197,6 +233,10 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * Error! * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, @@ -363,6 +403,10 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) 50 CONTINUE CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1, $ -1, -1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASMSUB diff --git a/SRC/pdlasrt.f b/SRC/pdlasrt.f index 79cf342d..5c66662c 100644 --- a/SRC/pdlasrt.f +++ b/SRC/pdlasrt.f @@ -1,10 +1,17 @@ - SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* + SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 2.0.2) -- * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, IQ, JQ, LIWORK, LWORK, N @@ -101,18 +108,59 @@ SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN * - IF( N.EQ.0 ) - $ RETURN +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCQ( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) ID, INFO, IQ, JQ, LIWORK, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLASRT inputs:,ID:',A5,',INFO:',I5,',IQ:',I5, + $ ',JQ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -141,6 +189,10 @@ SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLASRT', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * diff --git a/SRC/pdlassq.f b/SRC/pdlassq.f index a6826a66..9b2aa4ab 100644 --- a/SRC/pdlassq.f +++ b/SRC/pdlassq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SCALE, SUMSQ @@ -161,13 +168,42 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IX, INCX, JX, N, SCALE, SUMSQ, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASSQ inputs:,IX:',I5,',INCX:',I5,',JX:',I5, + $ ',N:',I5,',SCALE:',F9.4,',SUMSQ:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, @@ -178,8 +214,13 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * X is rowwise distributed. * - IF( MYROW.NE.IXROW ) - $ RETURN + IF( MYROW.NE.IXROW ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF ICOFF = MOD( JX, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) @@ -218,8 +259,13 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * X is columnwise distributed. * - IF( MYCOL.NE.IXCOL ) - $ RETURN + IF( MYCOL.NE.IXCOL ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IROFF = MOD( IX, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) @@ -256,6 +302,10 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASSQ diff --git a/SRC/pdlatra.f b/SRC/pdlatra.f index 577eaae2..5df19afb 100644 --- a/SRC/pdlatra.f +++ b/SRC/pdlatra.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, N * .. @@ -123,15 +130,47 @@ DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT(' inputs:,IA:',I5,',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * TRACE = ZERO IF( N.EQ.0 ) THEN PDLATRA = TRACE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -180,6 +219,10 @@ DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * PDLATRA = TRACE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRA diff --git a/SRC/pdlatrd.f b/SRC/pdlatrd.f index 5903bc99..1964e782 100644 --- a/SRC/pdlatrd.f +++ b/SRC/pdlatrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, $ IW, JW, DESCW, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IW, JA, JW, N, NB @@ -249,15 +256,49 @@ SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IW, JA, JW, N, NB, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLATRD inputs:,UPLO:',A5,',IA:',I5,',IW:',I5, + $ ',JA:',I5,',JW:',I5,',N:',I5, + $ ',NB:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, @@ -408,6 +449,10 @@ SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, END IF END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRD diff --git a/SRC/pdlatrs.f b/SRC/pdlatrs.f index e1f2755e..fc4f4ffb 100644 --- a/SRC/pdlatrs.f +++ b/SRC/pdlatrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ JA, DESCA, X, IX, JX, DESCX, SCALE, CNORM, $ WORK ) @@ -7,6 +13,7 @@ SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, IX, JA, JX, N @@ -47,17 +54,53 @@ SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, INFOG2L, $ PDTRSV * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, NORMIN, TRANS, UPLO, IA, + $ IX, JA, JX, N, SCALE, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLATRS inputs:,DIAG:',A5,',NORMIN:',A5, + $ ',TRANS:',A5,',UPLO:',A5,',IA:',I5, + $ ',IX:',I5,',JA:',I5,',JX:',I5,',N:',I5, + $ ',SCALE:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * ***** NO SCALING ***** Call PDTRSV for all cases ***** * @@ -80,6 +123,10 @@ SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ LDX, MYROW, IXCOL ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRS diff --git a/SRC/pdlatrz.f b/SRC/pdlatrz.f index e4952f3c..c3180a9f 100644 --- a/SRC/pdlatrz.f +++ b/SRC/pdlatrz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, L, M, N * .. @@ -185,17 +192,50 @@ SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) INTEGER NUMROC EXTERNAL NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, L, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLATRZ inputs:,IA:',I5,',JA:',I5,',L:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) @@ -230,6 +270,10 @@ SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRZ diff --git a/SRC/pdlauu2.f b/SRC/pdlauu2.f index 4eb40902..a82aace1 100644 --- a/SRC/pdlauu2.f +++ b/SRC/pdlauu2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N @@ -140,16 +147,49 @@ SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) DOUBLE PRECISION DDOT EXTERNAL DDOT, LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters and compute local indexes * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, JA, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAUU2 inputs:,UPLO:',A5,',IA:',I5,',JA:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * @@ -198,6 +238,10 @@ SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAUU2 diff --git a/SRC/pdlauum.f b/SRC/pdlauum.f index d0d1db54..0825cc3d 100644 --- a/SRC/pdlauum.f +++ b/SRC/pdlauum.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N @@ -139,12 +146,33 @@ SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( LSAME( UPLO, 'U' ) ) THEN @@ -208,6 +236,10 @@ SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) 20 CONTINUE END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAUUM diff --git a/SRC/pdlawil.f b/SRC/pdlawil.f index e8bc3a08..dd2920cc 100644 --- a/SRC/pdlawil.f +++ b/SRC/pdlawil.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER II, JJ, M DOUBLE PRECISION H33, H43H34, H44 @@ -131,12 +138,41 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) II, JJ, M, H33, H43H34, H44, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAWIL inputs:,II:',I5,',JJ:',I5,',M:',I5, + $ ',H33:',F9.4,',H43H34:',F9.4,',H44:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) @@ -230,8 +266,13 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) V3( 1 ) = A( ( ICOL-2 )*LDA+IROW ) END IF END IF - IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) - $ RETURN + IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( MODKM1.GT.1 ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, @@ -255,6 +296,10 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) V( 2 ) = V2 V( 3 ) = V3( 1 ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAWIL From f72c7e88da1fed2b904b18e2fdbe97ee6ba402ab Mon Sep 17 00:00:00 2001 From: nprasadm Date: Fri, 21 Apr 2023 15:27:36 +0530 Subject: [PATCH 21/30] Trace and Logging feature enabled for 63 double data type APIs. Signed-off-by: Nagendra AMD-Internal: [CPUPL-2700] Change-Id: I35fee30aada42c58471ee2aa7452bd843adaca77 --- SRC/pdorg2l.f | 57 +++++++++- SRC/pdorg2r.f | 57 +++++++++- SRC/pdorgl2.f | 57 +++++++++- SRC/pdorglq.f | 57 +++++++++- SRC/pdorgql.f | 57 +++++++++- SRC/pdorgqr.f | 57 +++++++++- SRC/pdorgr2.f | 57 +++++++++- SRC/pdorgrq.f | 57 +++++++++- SRC/pdorm2l.f | 59 ++++++++++- SRC/pdorm2r.f | 59 ++++++++++- SRC/pdormbr.f | 59 ++++++++++- SRC/pdormhr.f | 59 ++++++++++- SRC/pdorml2.f | 59 ++++++++++- SRC/pdormlq.f | 59 ++++++++++- SRC/pdormql.f | 59 ++++++++++- SRC/pdormqr.f | 59 ++++++++++- SRC/pdormr2.f | 59 ++++++++++- SRC/pdormr3.f | 59 ++++++++++- SRC/pdormrq.f | 59 ++++++++++- SRC/pdormrz.f | 59 ++++++++++- SRC/pdormtr.f | 59 ++++++++++- SRC/pdpbsv.f | 53 ++++++++++ SRC/pdpbtrf.f | 69 ++++++++++++- SRC/pdpbtrs.f | 76 +++++++++++++- SRC/pdpbtrsv.f | 76 +++++++++++++- SRC/pdpocon.f | 62 +++++++++++ SRC/pdpoequ.f | 52 ++++++++++ SRC/pdporfs.f | 56 ++++++++++ SRC/pdposv.f | 45 ++++++++ SRC/pdposvx.f | 62 +++++++++++ SRC/pdpotf2.f | 52 +++++++++- SRC/pdpotrf.f | 53 +++++++++- SRC/pdpotri.f | 61 ++++++++++- SRC/pdpotrs.f | 54 +++++++++- SRC/pdptsv.f | 52 ++++++++++ SRC/pdpttrf.f | 68 +++++++++++- SRC/pdpttrs.f | 74 ++++++++++++- SRC/pdpttrsv.f | 75 +++++++++++++- SRC/pdrot.f | 62 ++++++++++- SRC/pdrscl.f | 48 ++++++++- SRC/pdstedc.f | 57 +++++++++- SRC/pdstein.f | 62 ++++++++++- SRC/pdsyev.f | 82 +++++++++++++-- SRC/pdsyevd.f | 67 +++++++++++- SRC/pdsyevr.f | 275 ++++++++++++++++++++++++++++++++----------------- SRC/pdsyevx.f | 76 +++++++++++++- SRC/pdsygs2.f | 62 ++++++++++- SRC/pdsygst.f | 63 ++++++++++- SRC/pdsygvx.f | 67 +++++++++++- SRC/pdsyngst.f | 63 ++++++++++- SRC/pdsyntrd.f | 65 +++++++++++- SRC/pdsytd2.f | 57 +++++++++- SRC/pdsytrd.f | 57 +++++++++- SRC/pdsyttrd.f | 65 +++++++++++- SRC/pdtrcon.f | 54 ++++++++++ SRC/pdtrord.f | 132 ++++++++++++++++-------- SRC/pdtrrfs.f | 56 ++++++++++ SRC/pdtrsen.f | 52 ++++++++++ SRC/pdtrti2.f | 40 +++++++ SRC/pdtrtri.f | 62 ++++++++++- SRC/pdtrtrs.f | 64 +++++++++++- SRC/pdtzrzf.f | 57 +++++++++- SRC/pdzsum1.f | 52 +++++++++- 63 files changed, 3798 insertions(+), 262 deletions(-) diff --git a/SRC/pdorg2l.f b/SRC/pdorg2l.f index 889ff58a..aedd4a33 100644 --- a/SRC/pdorg2l.f +++ b/SRC/pdorg2l.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -181,13 +188,42 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORG2L inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -220,15 +256,28 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORG2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -271,6 +320,10 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORG2L diff --git a/SRC/pdorg2r.f b/SRC/pdorg2r.f index fbcb7b54..6d2d7a21 100644 --- a/SRC/pdorg2r.f +++ b/SRC/pdorg2r.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -182,13 +189,42 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORG2R inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -221,15 +257,28 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORG2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -274,6 +323,10 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORG2R diff --git a/SRC/pdorgl2.f b/SRC/pdorgl2.f index ec30d99b..ca797180 100644 --- a/SRC/pdorgl2.f +++ b/SRC/pdorgl2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -181,13 +188,42 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGL2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -220,15 +256,28 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -280,6 +329,10 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGL2 diff --git a/SRC/pdorglq.f b/SRC/pdorglq.f index 8cb7ab73..02017a97 100644 --- a/SRC/pdorglq.f +++ b/SRC/pdorglq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -184,13 +191,42 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGLQ inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -233,15 +269,28 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGLQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) @@ -326,6 +375,10 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGLQ diff --git a/SRC/pdorgql.f b/SRC/pdorgql.f index 978972f2..f423748a 100644 --- a/SRC/pdorgql.f +++ b/SRC/pdorgql.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -183,13 +190,42 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGQL inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -231,15 +267,28 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGQL', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) @@ -293,6 +342,10 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGQL diff --git a/SRC/pdorgqr.f b/SRC/pdorgqr.f index 5040bbd8..1f03c13c 100644 --- a/SRC/pdorgqr.f +++ b/SRC/pdorgqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -185,13 +192,42 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGQR inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -234,15 +270,28 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) @@ -329,6 +378,10 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGQR diff --git a/SRC/pdorgr2.f b/SRC/pdorgr2.f index d8dca581..499c0201 100644 --- a/SRC/pdorgr2.f +++ b/SRC/pdorgr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -181,13 +188,42 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGR2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -220,15 +256,28 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -274,6 +323,10 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGR2 diff --git a/SRC/pdorgrq.f b/SRC/pdorgrq.f index 1e493c54..5e6fb586 100644 --- a/SRC/pdorgrq.f +++ b/SRC/pdorgrq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -184,13 +191,42 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGRQ inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -233,15 +269,28 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGRQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) @@ -295,6 +344,10 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGRQ diff --git a/SRC/pdorm2l.f b/SRC/pdorm2l.f index ede69c4d..fd1d189c 100644 --- a/SRC/pdorm2l.f +++ b/SRC/pdorm2l.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -237,13 +244,44 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORM2L inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -314,15 +352,28 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORM2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, @@ -425,6 +476,10 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORM2L diff --git a/SRC/pdorm2r.f b/SRC/pdorm2r.f index 93a92968..889d36bb 100644 --- a/SRC/pdorm2r.f +++ b/SRC/pdorm2r.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -237,13 +244,44 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORM2R inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -315,15 +353,28 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORM2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, @@ -429,6 +480,10 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORM2R diff --git a/SRC/pdormbr.f b/SRC/pdormbr.f index e58eb853..94e0b828 100644 --- a/SRC/pdormbr.f +++ b/SRC/pdormbr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS, VECT INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -309,13 +316,44 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, VECT, IA, IC, INFO, + $ JA, JC, K, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORMBR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',VECT:',A5,',IA:',I5,',IC:',I5,',INFO:',I5, + $ ',JA:',I5,',JC:',I5,',K:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -530,15 +568,28 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMBR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( APPLYQ ) THEN * @@ -584,6 +635,10 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMBR diff --git a/SRC/pdormhr.f b/SRC/pdormhr.f index ca94dc44..efa53e69 100644 --- a/SRC/pdormhr.f +++ b/SRC/pdormhr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, IHI, ILO, INFO, JA, JC, LWORK, M, N @@ -246,13 +253,44 @@ SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, IHI, ILO, + $ INFO, JA, JC, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORMHR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',IHI:',I5,',ILO:',I5, + $ ',INFO:',I5,',JA:',I5,',JC:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -375,21 +413,38 @@ SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMHR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PDORMQR( SIDE, TRANS, MI, NI, NH, A, IAA, JAA, DESCA, TAU, $ C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMHR diff --git a/SRC/pdorml2.f b/SRC/pdorml2.f index 424b18b6..518c6a14 100644 --- a/SRC/pdorml2.f +++ b/SRC/pdorml2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -235,13 +242,44 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORML2 inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -317,15 +355,28 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORML2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -383,6 +434,10 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORML2 diff --git a/SRC/pdormlq.f b/SRC/pdormlq.f index 69f4fee1..bc445bad 100644 --- a/SRC/pdormlq.f +++ b/SRC/pdormlq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMLQ inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -355,15 +393,28 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMLQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -444,6 +495,10 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMLQ diff --git a/SRC/pdormql.f b/SRC/pdormql.f index 24030893..24e90cba 100644 --- a/SRC/pdormql.f +++ b/SRC/pdormql.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMQL inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -356,15 +394,28 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMQL', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -451,6 +502,10 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMQL diff --git a/SRC/pdormqr.f b/SRC/pdormqr.f index 65e8cd05..d69246e9 100644 --- a/SRC/pdormqr.f +++ b/SRC/pdormqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMQR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -356,15 +394,28 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -443,6 +494,10 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMQR diff --git a/SRC/pdormr2.f b/SRC/pdormr2.f index b7fd1dcf..17d0aac3 100644 --- a/SRC/pdormr2.f +++ b/SRC/pdormr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -234,13 +241,44 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMR2 inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -312,15 +350,28 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -374,6 +425,10 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMR2 diff --git a/SRC/pdormr3.f b/SRC/pdormr3.f index ac77077f..bac4bb63 100644 --- a/SRC/pdormr3.f +++ b/SRC/pdormr3.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N @@ -237,13 +244,44 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, L, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMR3 inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',L:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -317,15 +355,28 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMR3', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -383,6 +434,10 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMR3 diff --git a/SRC/pdormrq.f b/SRC/pdormrq.f index 1ea36953..2b425bf7 100644 --- a/SRC/pdormrq.f +++ b/SRC/pdormrq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMRQ inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -367,15 +405,28 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMRQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -468,6 +519,10 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMRQ diff --git a/SRC/pdormrz.f b/SRC/pdormrz.f index 7bde2ab3..e21a91bc 100644 --- a/SRC/pdormrz.f +++ b/SRC/pdormrz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N @@ -250,13 +257,44 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, L, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMRZ inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',L:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -364,15 +402,28 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMRZ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -471,6 +522,10 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMRZ diff --git a/SRC/pdormtr.f b/SRC/pdormtr.f index 6898b6c7..04182aec 100644 --- a/SRC/pdormtr.f +++ b/SRC/pdormtr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, LWORK, M, N @@ -260,13 +267,44 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, UPLO, IA, IC, INFO, + $ JA, JC, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMTR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',UPLO:',A5,',IA:',I5,',IC:',I5,',INFO:',I5, + $ ',JA:',I5,',JC:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -402,15 +440,28 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMTR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( UPPER ) THEN * @@ -430,6 +481,10 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMTR diff --git a/SRC/pdpbsv.f b/SRC/pdpbsv.f index 68eacc84..577c6d3d 100644 --- a/SRC/pdpbsv.f +++ b/SRC/pdpbsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LWORK, N, NRHS @@ -382,8 +389,24 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * .. External Subroutines .. EXTERNAL PDPBTRF, PDPBTRS, PXERBLA * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDPBTRF and PDPBTRS. @@ -405,11 +428,29 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDPBSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, BW, IB, INFO, JA, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPBSV inputs:,UPLO:',A5,',BW:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -427,6 +468,10 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDPBSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -440,9 +485,17 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBSV diff --git a/SRC/pdpbtrf.f b/SRC/pdpbtrf.f index 6367cac6..131f4874 100644 --- a/SRC/pdpbtrf.f +++ b/SRC/pdpbtrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, INFO, JA, LAF, LWORK, N @@ -388,8 +395,24 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -421,6 +444,19 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, BW, INFO, JA, LAF, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPBTRF inputs:,UPLO:',A5,',BW:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -471,12 +507,20 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDPBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -491,6 +535,10 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDPBTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -505,6 +553,10 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, INFO = -10 CALL PXERBLA( ICTXT, 'PDPBTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -560,13 +612,22 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1467,6 +1528,10 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, END IF * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBTRF diff --git a/SRC/pdpbtrs.f b/SRC/pdpbtrs.f index a09a0b93..784dfaed 100644 --- a/SRC/pdpbtrs.f +++ b/SRC/pdpbtrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -391,8 +398,24 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -450,6 +473,21 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, BW, IB, INFO, JA, LAF, + $ LWORK, N, NRHS, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDPBTRS inputs:,UPLO:',A5,',BW:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',LAF:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -518,12 +556,20 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDPBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -537,6 +583,10 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, INFO = -14 CALL PXERBLA( ICTXT, 'PDPBTRS: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -606,16 +656,30 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -728,6 +792,10 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBTRS diff --git a/SRC/pdpbtrsv.f b/SRC/pdpbtrsv.f index a4cdffc4..5ce87c46 100644 --- a/SRC/pdpbtrsv.f +++ b/SRC/pdpbtrsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, $ IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -405,8 +412,24 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -468,6 +491,21 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, UPLO, BW, IB, INFO, JA, + $ LAF, LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDPBTRSV inputs:,TRANS:',A5,',UPLO:',A5, + $ ',BW:',I5,',IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LAF:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -547,6 +585,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, CALL PXERBLA( ICTXT, $ 'PDPBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -554,6 +596,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRSV, D&C alg.: NB too small', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -567,6 +613,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, INFO = -14 CALL PXERBLA( ICTXT, 'PDPBTRSV: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -638,16 +688,30 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1504,6 +1568,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBTRSV diff --git a/SRC/pdpocon.f b/SRC/pdpocon.f index 9930469d..f338027a 100644 --- a/SRC/pdpocon.f +++ b/SRC/pdpocon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LIWORK, LWORK, N @@ -203,13 +210,44 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LIWORK, + $ LWORK, N, ANORM, RCOND, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDPOCON inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',ANORM:',F9.4,',RCOND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -273,8 +311,16 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOCON', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -283,11 +329,23 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( ANORM.EQ.ZERO ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -398,6 +456,10 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOCON diff --git a/SRC/pdpoequ.f b/SRC/pdpoequ.f index 6254daea..0f394dbc 100644 --- a/SRC/pdpoequ.f +++ b/SRC/pdpoequ.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, N DOUBLE PRECISION AMAX, SCOND @@ -178,13 +185,42 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, N, AMAX, SCOND, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOEQU inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',N:',I5,',AMAX:',F9.4,',SCOND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -198,6 +234,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOEQU', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -206,6 +246,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, IF( N.EQ.0 ) THEN SCOND = ONE AMAX = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -329,6 +373,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * CALL IGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, INFO, 1, II, JJ, -1, $ -1, MYCOL ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE @@ -350,6 +398,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOEQU diff --git a/SRC/pdporfs.f b/SRC/pdporfs.f index 2fa87ee6..cb4600de 100644 --- a/SRC/pdporfs.f +++ b/SRC/pdporfs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, @@ -297,7 +304,23 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * .. Initialize EST EST = 0.0 * @@ -306,6 +329,23 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IAF, IB, INFO, IX, + $ JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDPORFS inputs:,UPLO:',A5,',IA:',I5,',IAF:',I5, + $ ',IB:',I5,',INFO:',I5,',IX:',I5, + $ ',JA:',I5,',JAF:',I5,',JB:',I5, + $ ',JX:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -422,8 +462,16 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPORFS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -438,6 +486,10 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -856,6 +908,10 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPORFS diff --git a/SRC/pdposv.f b/SRC/pdposv.f index 304e0617..25e140f9 100644 --- a/SRC/pdposv.f +++ b/SRC/pdposv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -190,13 +197,43 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, INFO, JA, JB, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPOSV inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -239,6 +276,10 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -256,6 +297,10 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOSV diff --git a/SRC/pdposvx.f b/SRC/pdposvx.f index 0e75f09a..4908f8ad 100644 --- a/SRC/pdposvx.f +++ b/SRC/pdposvx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, EQUED, SR, SC, B, IB, JB, $ DESCB, X, IX, JX, DESCX, RCOND, FERR, BERR, @@ -8,6 +14,7 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED, FACT, UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, @@ -387,13 +394,48 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*512 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, FACT, UPLO, IA, IAF, + $ IB, INFO, IX, JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, + $ RCOND, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPOSVX inputs:,EQUED:',A5,',FACT:',A5, + $ ',UPLO:',A5,',IA:',I5,',IAF:',I5,',IB:',I5, + $ ',INFO:',I5,',IX:',I5,',JA:',I5, + $ ',JAF:',I5,',JB:',I5,',JX:',I5,',LIWORK:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',RCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -546,8 +588,16 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOSVX', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -601,6 +651,10 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF END IF @@ -618,6 +672,10 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -661,6 +719,10 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOSVX diff --git a/SRC/pdpotf2.f b/SRC/pdpotf2.f index fbd98551..324025c0 100644 --- a/SRC/pdpotf2.f +++ b/SRC/pdpotf2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N @@ -164,13 +171,41 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) DOUBLE PRECISION DDOT EXTERNAL LSAME, DDOT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOTF2 inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -199,13 +234,22 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Compute local information * @@ -344,6 +388,10 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTF2 diff --git a/SRC/pdpotrf.f b/SRC/pdpotrf.f index 8f9cd118..85ff50fd 100644 --- a/SRC/pdpotrf.f +++ b/SRC/pdpotrf.f @@ -1,6 +1,11 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* * -- ScaLAPACK routine -- -* Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * @@ -9,6 +14,7 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N @@ -174,13 +180,41 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOTRF inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -214,13 +248,22 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * #ifdef AOCL_PROGRESS LSTAGE = 7 @@ -378,6 +421,10 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTRF diff --git a/SRC/pdpotri.f b/SRC/pdpotri.f index fd287cdd..54b2ebd0 100644 --- a/SRC/pdpotri.f +++ b/SRC/pdpotri.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N @@ -143,13 +150,41 @@ SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOTRI inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -182,25 +217,43 @@ SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRI', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Invert the triangular Cholesky factor U or L. * CALL PDTRTRI( UPLO, 'Non-unit', N, A, IA, JA, DESCA, INFO ) * - IF( INFO.GT.0 ) - $ RETURN + IF( INFO.GT.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Form inv(U)*inv(U)' or inv(L)'*inv(L). * CALL PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTRI diff --git a/SRC/pdpotrs.f b/SRC/pdpotrs.f index 36c82e76..ce75ea8b 100644 --- a/SRC/pdpotrs.f +++ b/SRC/pdpotrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -171,13 +178,43 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, INFO, JA, JB, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPOTRS inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -221,13 +258,22 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. NRHS.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( UPPER ) THEN * @@ -257,6 +303,10 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTRS diff --git a/SRC/pdptsv.f b/SRC/pdptsv.f index 25682bdd..8dc7be43 100644 --- a/SRC/pdptsv.f +++ b/SRC/pdptsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. @@ -383,8 +390,24 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, * .. External Subroutines .. EXTERNAL PDPTTRF, PDPTTRS, PXERBLA * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDPTTRF and PDPTTRS. @@ -409,11 +432,28 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, CALL PXERBLA( ICTXT, $ 'PDPTSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IB, INFO, JA, LWORK, N, NRHS, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPTSV inputs:,IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -430,6 +470,10 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDPTSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -443,9 +487,17 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTSV diff --git a/SRC/pdpttrf.f b/SRC/pdpttrf.f index 3266faeb..09e99034 100644 --- a/SRC/pdpttrf.f +++ b/SRC/pdpttrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. @@ -384,8 +391,24 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -421,6 +444,18 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, JA, LAF, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPTTRF inputs:,INFO:',I5,',JA:',I5,',LAF:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -451,12 +486,20 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDPTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -471,6 +514,10 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDPTTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -485,6 +532,10 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -9 CALL PXERBLA( ICTXT, 'PDPTTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -536,13 +587,22 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1010,6 +1070,10 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, END IF * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTTRF diff --git a/SRC/pdpttrs.f b/SRC/pdpttrs.f index 2e542331..9821b9c1 100644 --- a/SRC/pdpttrs.f +++ b/SRC/pdpttrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, $ LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. @@ -398,8 +405,24 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -465,6 +488,19 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IB, INFO, JA, LAF, LWORK, N, + $ NRHS, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPTTRS inputs:,IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LAF:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -513,12 +549,20 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDPTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -532,6 +576,10 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, INFO = -12 CALL PXERBLA( ICTXT, 'PDPTTRS: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -597,16 +645,30 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -741,6 +803,10 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTTRS diff --git a/SRC/pdpttrsv.f b/SRC/pdpttrsv.f index 876e667c..d10d7610 100644 --- a/SRC/pdpttrsv.f +++ b/SRC/pdpttrsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS @@ -409,8 +416,24 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -476,6 +499,20 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IB, INFO, JA, LAF, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPTTRSV inputs:,UPLO:',A5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -533,6 +570,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDPTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -540,6 +581,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRSV, D&C alg.: NB too small', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -553,6 +598,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, INFO = -14 CALL PXERBLA( ICTXT, 'PDPTTRSV: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -620,16 +669,30 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1081,6 +1144,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTTRSV diff --git a/SRC/pdrot.f b/SRC/pdrot.f index cbbfb3e8..35d715dd 100644 --- a/SRC/pdrot.f +++ b/SRC/pdrot.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, $ INCY, CS, SN, WORK, LWORK, INFO ) * @@ -9,6 +15,7 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -200,12 +207,43 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * .. Local Functions .. INTEGER ICEIL * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) N, IX, JX, INCX, IY, JY, INCY, + $ LWORK, INFO, CS, SN, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDROT inputs:,N:',I5,',IX:',I5,',JX:',I5, + $ ',INCX:',I5,',IY:',I5,',JY:',I5,',INCY:',I5, + $ ',LWORK:',I5,',INFO:',I5, + $ ',CS:',F9.4,',SN:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Test and decode parameters @@ -268,7 +306,7 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, IF( LEFT ) THEN RSRC1 = INDXG2P( IX, MB, MYROW, DESCX(RSRC_), NPROW ) RSRC2 = INDXG2P( IY, MB, MYROW, DESCY(RSRC_), NPROW ) - CSRC = INDXG2P( JX, NB, MYCOL, DESCX(CSRC_), NPCOL ) + CSRC = INDXG2P( JX, NB, MYCOL, DESCX(CSRC_), NPCOL ) ICOFFXY = MOD( JX - 1, NB ) XYCOLS = NUMROC( N+ICOFFXY, NB, MYCOL, CSRC, NPCOL ) IF( ( MYROW.EQ.RSRC1 .OR. MYROW.EQ.RSRC2 ) .AND. @@ -281,7 +319,7 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, ELSEIF( RIGHT ) THEN CSRC1 = INDXG2P( JX, NB, MYCOL, DESCX(CSRC_), NPCOL ) CSRC2 = INDXG2P( JY, NB, MYCOL, DESCY(CSRC_), NPCOL ) - RSRC = INDXG2P( IX, MB, MYROW, DESCX(RSRC_), NPROW ) + RSRC = INDXG2P( IX, MB, MYROW, DESCX(RSRC_), NPROW ) IROFFXY = MOD( IX - 1, MB ) XYROWS = NUMROC( N+IROFFXY, MB, MYROW, RSRC, NPROW ) IF( ( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC2 ) .AND. @@ -300,15 +338,27 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDROT', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LQUERY ) THEN WORK( 1 ) = DBLE(MNWRK) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -328,6 +378,10 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, CALL DROT( N, X((JX-1)*LLDX+IX), 1, Y((JY-1)*LLDY+IY), $ 1, CS, SN ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -435,6 +489,10 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * Store minimum workspace requirements in WORK-array and return * WORK( 1 ) = DBLE(MNWRK) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDROT diff --git a/SRC/pdrscl.f b/SRC/pdrscl.f index 4096d1e8..7ec8d109 100644 --- a/SRC/pdrscl.f +++ b/SRC/pdrscl.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SA @@ -141,17 +148,50 @@ SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * .. Intrinsic Functions .. INTRINSIC ABS * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IX, INCX, JX, N, SA, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDRSCL inputs:,IX:',I5,',INCX:',I5,',JX:',I5, + $ ',N:',I5,',SA:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get machine parameters * @@ -198,6 +238,10 @@ SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) IF( .NOT.DONE ) $ GO TO 10 * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDRSCL diff --git a/SRC/pdstedc.f b/SRC/pdstedc.f index 1d0f49ec..3ad94f38 100644 --- a/SRC/pdstedc.f +++ b/SRC/pdstedc.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, * and University of California, Berkeley. * March 13, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, IQ, JQ, LIWORK, LWORK, N @@ -145,15 +152,51 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) COMPZ, INFO, IQ, JQ, LIWORK, + $ LWORK, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDSTEDC inputs:,COMPZ:',A5,',INFO:',I5, + $ ',IQ:',I5,',JQ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LDQ = DESCQ( LLD_ ) NB = DESCQ( NB_ ) NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) @@ -192,8 +235,16 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDSTEDC', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -261,6 +312,10 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, $ WORK( 1 ) = DBLE( LWMIN ) IF( LIWORK.GT.0 ) $ IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSTEDC diff --git a/SRC/pdstein.f b/SRC/pdstein.f index 91d6e743..21024133 100644 --- a/SRC/pdstein.f +++ b/SRC/pdstein.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, IZ, JZ, LIWORK, LWORK, M, N DOUBLE PRECISION ORFAC @@ -295,12 +302,48 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, IZ, JZ, LIWORK, LWORK, + $ M, N, ORFAC, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDSTEIN inputs:,INFO:',I5,',IZ:',I5,',JZ:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',ORFAC:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF SELF = MYROW*NPCOL + MYCOL * * Make sure that we belong to this context (before calling PCHK1MAT) @@ -376,8 +419,16 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCZ( CTXT_ ), 'PDSTEIN', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -396,8 +447,13 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, * * Quick return if possible * - IF( N.EQ.0 .OR. M.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. M.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( ORFAC.GE.ZERO ) THEN TMPFAC = ORFAC diff --git a/SRC/pdsyev.f b/SRC/pdsyev.f index 8ccf3ac0..f0ccd441 100644 --- a/SRC/pdsyev.f +++ b/SRC/pdsyev.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, $ Z, IZ, JZ, DESCZ, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LWORK, N @@ -248,15 +255,15 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * .. * .. Local Scalars .. LOGICAL LOWER, WANTZ - INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, - $ IINFO, INDD, INDD2, INDE, INDE2, INDTAU, - $ INDWORK, INDWORK2, IROFFA, IROFFZ, ISCALE, - $ IZROW, J, K, LDC, LLWORK, LWMIN, MB_A, MB_Z, + INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, + $ IINFO, INDD, INDD2, INDE, INDE2, INDTAU, + $ INDWORK, INDWORK2, IROFFA, IROFFZ, ISCALE, + $ IZROW, J, K, LDC, LLWORK, LWMIN, MB_A, MB_Z, $ MYCOL, MYPCOLC, MYPROWC, MYROW, NB, NB_A, NB_Z, - $ NP, NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ, - $ NRC, QRMEM, RSRC_A, RSRC_Z, SIZEMQRLEFT, + $ NP, NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ, + $ NRC, QRMEM, RSRC_A, RSRC_Z, SIZEMQRLEFT, $ SIZESYTRD - DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. @@ -278,19 +285,60 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD, SQRT, INT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Quick return * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( N.EQ.0 ) RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, UPLO, IA, INFO, IZ, JA, + $ JZ, LWORK, N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDSYEV inputs:,JOBZ:',A5,',UPLO:',A5,',IA:',I5, + $ ',INFO:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN @@ -452,9 +500,17 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDSYEV', -INFO ) IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LWORK .EQ. -1 ) THEN IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -491,7 +547,7 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, 10 CONTINUE IF( LSAME( UPLO, 'U') ) THEN DO 20 I=1,N-1 - CALL PDELGET( 'A', ' ', WORK(INDE2+I-1), A, + CALL PDELGET( 'A', ' ', WORK(INDE2+I-1), A, $ I+IA-1, I+JA, DESCA ) 20 CONTINUE ELSE @@ -511,7 +567,7 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * to matrix Q. * CALL DSTEQR2( 'I', N, WORK( INDD2 ), WORK( INDE2 ), - $ WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), + $ WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), $ INFO ) * CALL PDGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, IA, JA, @@ -567,11 +623,15 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * DO 50 I = 1, J IF( INFO.EQ.0 .AND. ( WORK( I+INDTAU )-WORK( I+INDE ) - $ .NE. ZERO ) )THEN + $ .NE. ZERO ) )THEN INFO = N+1 END IF 50 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEV diff --git a/SRC/pdsyevd.f b/SRC/pdsyevd.f index 5d4463eb..120b7d36 100644 --- a/SRC/pdsyevd.f +++ b/SRC/pdsyevd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, * and University of California, Berkeley. * March 14, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LWORK, N @@ -188,20 +195,62 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Quick return * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Test the input arguments. * ICTXT = DESCZ( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, UPLO, IA, INFO, IZ, JA, + $ JZ, LIWORK, LWORK, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDSYEVD inputs:,JOBZ:',A5,',UPLO:',A5, + $ ',IA:',I5,',INFO:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * INFO = 0 IF( NPROW.EQ.-1 ) THEN @@ -272,8 +321,16 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -346,6 +403,10 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, CALL DSCAL( N, ONE / SIGMA, W, 1 ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEVD diff --git a/SRC/pdsyevr.f b/SRC/pdsyevr.f index 984b9904..9ad360d8 100644 --- a/SRC/pdsyevr.f +++ b/SRC/pdsyevr.f @@ -1,8 +1,15 @@ - SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* + SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, VL, VU, IL, IU, M, NZ, W, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, $ INFO ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK routine (version 2.0.2) -- @@ -25,14 +32,14 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * PDSYEVR computes selected eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A distributed in 2D blockcyclic format -* by calling the recommended sequence of ScaLAPACK routines. +* by calling the recommended sequence of ScaLAPACK routines. * * First, the matrix A is reduced to real symmetric tridiagonal form. * Then, the eigenproblem is solved using the parallel MRRR algorithm. * Last, if eigenvectors have been computed, a backtransformation is done. * * Upon successful completion, each processor stores a copy of all computed -* eigenvalues in W. The eigenvector matrix Z is stored in +* eigenvalues in W. The eigenvector matrix Z is stored in * 2D blockcyclic format distributed over all processors. * * Note that subsets of eigenvalues/vectors can be selected by @@ -67,7 +74,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * A (local input/workspace) 2D block cyclic DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_A, LOCc(JA+N-1) ), -* (see Notes below for more detailed explanation of 2d arrays) +* (see Notes below for more detailed explanation of 2d arrays) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of @@ -81,7 +88,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * IA (global input) INTEGER * A's global row index, which points to the beginning of the -* submatrix which is to be operated on. +* submatrix which is to be operated on. * It should be set to 1 when operating on a full matrix. * * JA (global input) INTEGER @@ -91,17 +98,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * DESCA (global and local input) INTEGER array of dimension DLEN=9. * The array descriptor for the distributed matrix A. -* The descriptor stores details about the 2D block-cyclic +* The descriptor stores details about the 2D block-cyclic * storage, see the notes below. * If DESCA is incorrect, PDSYEVR cannot guarantee * correct error reporting. * Also note the array alignment requirements specified below. * -* VL (global input) DOUBLE PRECISION +* VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * -* VU (global input) DOUBLE PRECISION +* VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * @@ -122,7 +129,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. -* If JOBZ .EQ. 'V', NZ = M +* If JOBZ .EQ. 'V', NZ = M * * W (global output) DOUBLE PRECISION array, dimension (N) * Upon successful exit, the first M entries contain the selected @@ -131,7 +138,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Z (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) -* (see Notes below for more detailed explanation of 2d arrays) +* (see Notes below for more detailed explanation of 2d arrays) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. @@ -189,8 +196,8 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by * PXERBLA. -* Note that in a workspace query, for performance the optimal -* workspace LWOPT is returned rather than the minimum necessary +* Note that in a workspace query, for performance the optimal +* workspace LWOPT is returned rather than the minimum necessary * WORKSPACE LWMIN. For very small matrices, LWOPT >> LWMIN. * * IWORK (local workspace) INTEGER array @@ -203,7 +210,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Let NNP = MAX( N, NPROW*NPCOL + 1, 4 ). Then: * LIWORK >= 12*NNP + 2*N when the eigenvectors are desired * LIWORK >= 10*NNP + 2*N when only the eigenvalues have to be computed -* +* * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these @@ -226,8 +233,8 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. -* Such a global array has an associated description vector DESCA, -* or DESCZ for the descriptor of Z, etc. +* Such a global array has an associated description vector DESCA, +* or DESCZ for the descriptor of Z, etc. * The length of a ScaLAPACK descriptor is nine. * In the following comments, the character _ should be read as * "of the global array". @@ -273,7 +280,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * -* PDSYEVR assumes IEEE 754 standard compliant arithmetic. +* PDSYEVR assumes IEEE 754 standard compliant arithmetic. * * Alignment requirements * ====================== @@ -281,9 +288,9 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * The distributed submatrices A(IA:*, JA:*) and Z(IZ:IZ+M-1,JZ:JZ+N-1) * must satisfy the following alignment properties: * -* 1.Identical (quadratic) dimension: +* 1.Identical (quadratic) dimension: * DESCA(M_) = DESCZ(M_) = DESCA(N_) = DESCZ(N_) -* 2.Quadratic conformal blocking: +* 2.Quadratic conformal blocking: * DESCA(MB_) = DESCA(NB_) = DESCZ(MB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * 3.MOD( IA-1, MB_A ) = MOD( IZ-1, MB_Z ) = 0 @@ -338,8 +345,24 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*512 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* INFO = 0 @@ -366,7 +389,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, *********************************************************************** * * Set up pointers into the WORK array -* +* *********************************************************************** INDTAU = 1 INDD = INDTAU + N @@ -382,6 +405,24 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * *********************************************************************** CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, RANGE, UPLO, IA, IL, INFO, + $ IU, IZ, JA, JZ, LIWORK, LWORK, + $ M, N, NZ, VL, VU, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYEVR inputs:,JOBZ:',A5,',RANGE:',A5, + $ ',UPLO:',A5,',IA:',I5,',IL:',I5,',INFO:',I5, + $ ',IU:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NZ:',I5,',VL:',F9.4, + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW * NPCOL @@ -407,11 +448,11 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Take upper bound for VALEIG case MZ = N END IF -* +* NB = DESCA( NB_ ) IF ( WANTZ ) THEN NP00 = NUMROC( N, NB, 0, 0, NPROW ) - MQ00 = NUMROC( MZ, NB, 0, 0, NPCOL ) + MQ00 = NUMROC( MZ, NB, 0, 0, NPCOL ) INDRW = INDWORK + MAX(18*N, NP00*MQ00 + 2*NB*NB) LWMIN = INDRW - 1 + (ICEIL(MZ, NPROCS) + 2)*N ELSE @@ -436,7 +477,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, *********************************************************************** NNP = MAX( N, NPROCS+1, 4 ) IF ( WANTZ ) THEN - LIWMIN = 12*NNP + 2*N + LIWMIN = 12*NNP + 2*N ELSE LIWMIN = 10*NNP + 2*N END IF @@ -444,12 +485,12 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, *********************************************************************** * * Set up pointers into the IWORK array -* +* *********************************************************************** * Pointer to eigenpair distribution over processors - INDILU = LIWMIN - 2*NPROCS + 1 - SIZE2 = INDILU - 2*N - + INDILU = LIWMIN - 2*NPROCS + 1 + SIZE2 = INDILU - 2*N + *********************************************************************** * @@ -486,9 +527,9 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, INFO = -( 800+NB_ ) END IF IF( WANTZ ) THEN - IAROW = INDXG2P( 1, DESCA( NB_ ), MYROW, + IAROW = INDXG2P( 1, DESCA( NB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) - IZROW = INDXG2P( 1, DESCA( NB_ ), MYROW, + IZROW = INDXG2P( 1, DESCA( NB_ ), MYROW, $ DESCZ( RSRC_ ), NPROW ) IF( IAROW.NE.IZROW ) THEN INFO = -19 @@ -548,8 +589,16 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -565,6 +614,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, M = 0 WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -593,6 +646,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'PDSYNTRD', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -602,7 +659,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * *********************************************************************** OFFSET = 0 - IF( IA.EQ.1 .AND. JA.EQ.1 .AND. + IF( IA.EQ.1 .AND. JA.EQ.1 .AND. $ DESCA( RSRC_ ).EQ.0 .AND. DESCA( CSRC_ ).EQ.0 ) $ THEN CALL PDLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ), @@ -638,16 +695,16 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * SET IIL, IIU * *********************************************************************** - IF ( ALLEIG ) THEN + IF ( ALLEIG ) THEN IIL = 1 IIU = N ELSE IF ( INDEIG ) THEN IIL = IL IIU = IU ELSE IF ( VALEIG ) THEN - CALL DLARRC('T', N, VLL, VUU, WORK( INDD2 ), + CALL DLARRC('T', N, VLL, VUU, WORK( INDD2 ), $ WORK( INDE2 + OFFSET ), SAFMIN, EIGCNT, IIL, IIU, INFO) -* Refine upper bound N that was taken +* Refine upper bound N that was taken MZ = EIGCNT IIL = IIL + 1 ENDIF @@ -659,6 +716,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, END IF WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -684,7 +745,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, MYIU = IWORK(INDILU+NPROCS+MYPROC) - ZOFFSET = MAX(0, MYIL - IIL - 1) + ZOFFSET = MAX(0, MYIL - IIL - 1) FIRST = ( MYIL .EQ. IIL ) @@ -703,10 +764,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, DOU = MYIU - MYIL + 1 CALL DSTEGR2( JOBZ, 'I', N, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), VLL, VUU, MYIL, MYIU, - $ IM, W( 1 ), WORK( INDRW ), N, + $ IM, W( 1 ), WORK( INDRW ), N, $ MYIU - MYIL + 1, - $ IWORK( 1 ), WORK( INDWORK ), SIZE1, - $ IWORK( 2*N+1 ), SIZE2, + $ IWORK( 1 ), WORK( INDWORK ), SIZE1, + $ IWORK( 2*N+1 ), SIZE2, $ DOL, DOU, ZOFFSET, IINFO ) * DSTEGR2 zeroes out the entire W array, so we can't just give * it the part of W we need. So here we copy the W entries into @@ -719,6 +780,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, END IF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF ELSEIF ( WANTZ .AND. NPROCS.EQ.1 ) THEN @@ -731,21 +796,25 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, DOU = MYIU - IIL + 1 CALL DSTEGR2( JOBZ, 'I', N, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), VLL, VUU, IIL, IIU, - $ IM, W( 1 ), WORK( INDRW ), N, + $ IM, W( 1 ), WORK( INDRW ), N, $ N, - $ IWORK( 1 ), WORK( INDWORK ), SIZE1, + $ IWORK( 1 ), WORK( INDWORK ), SIZE1, $ IWORK( 2*N+1 ), SIZE2, DOL, DOU, $ ZOFFSET, IINFO ) ENDIF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF ELSEIF ( WANTZ ) THEN * * Compute representations in parallel. * Share eigenvalue computation for root between all processors -* Then compute the eigenvectors. +* Then compute the eigenvectors. * IINFO = 0 * Part 1. compute root representations and root eigenvalues @@ -754,20 +823,24 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, DOU = MYIU - IIL + 1 CALL DSTEGR2A( JOBZ, 'I', N, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), VLL, VUU, IIL, IIU, - $ IM, W( 1 ), WORK( INDRW ), N, - $ N, WORK( INDWORK ), SIZE1, - $ IWORK( 2*N+1 ), SIZE2, DOL, + $ IM, W( 1 ), WORK( INDRW ), N, + $ N, WORK( INDWORK ), SIZE1, + $ IWORK( 2*N+1 ), SIZE2, DOL, $ DOU, NEEDIL, NEEDIU, $ INDERR, NSPLIT, PIVMIN, SCALE, WL, WU, $ IINFO ) ENDIF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2A', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * The second part of parallel MRRR, the representation tree -* construction begins. Upon successful completion, the +* construction begins. Upon successful completion, the * eigenvectors have been computed. This is indicated by * the flag FINISH. * @@ -780,17 +853,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * There are currently two ways to communicate eigenvalue information * using the BLACS. -* 1.) BROADCAST +* 1.) BROADCAST * 2.) POINT2POINT between collaborators (those processors working * jointly on a cluster. * For efficiency, BROADCAST has been disabled. -* At a later stage, other more efficient communication algorithms +* At a later stage, other more efficient communication algorithms * might be implemented, e. g. group or tree-based communication. * DOBCST = .FALSE. IF(DOBCST) THEN * First gather everything on the first processor. -* Then use BROADCAST-based communication +* Then use BROADCAST-based communication DO 45 I = 2, NPROCS IF (MYPROC .EQ. (I - 1)) THEN DSTROW = 0 @@ -803,25 +876,25 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LENGTHI = 0 ENDIF IWORK(2) = LENGTHI - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF (( STARTI.GE.1 ) .AND. ( LENGTHI.GE.1 )) THEN LENGTHI2 = 2*LENGTHI * Copy eigenvalues into communication buffer CALL DCOPY(LENGTHI,W( STARTI ),1, - $ WORK( INDD ), 1) + $ WORK( INDD ), 1) * Copy uncertainties into communication buffer CALL DCOPY(LENGTHI,WORK( IINDERR+STARTI-1 ),1, - $ WORK( INDD+LENGTHI ), 1) + $ WORK( INDD+LENGTHI ), 1) * send buffer - CALL DGESD2D( ICTXT, LENGTHI2, + CALL DGESD2D( ICTXT, LENGTHI2, $ 1, WORK( INDD ), LENGTHI2, $ DSTROW, DSTCOL ) END IF ELSE IF (MYPROC .EQ. 0) THEN SRCROW = (I-1) / NPCOL SRCCOL = MOD(I-1, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) STARTI = IWORK(1) LENGTHI = IWORK(2) @@ -832,10 +905,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ WORK(INDD), LENGTHI2, SRCROW, SRCCOL ) * copy eigenvalues from communication buffer CALL DCOPY( LENGTHI, WORK(INDD), 1, - $ W( STARTI ), 1) + $ W( STARTI ), 1) * copy uncertainties (errors) from communication buffer CALL DCOPY(LENGTHI,WORK(INDD+LENGTHI),1, - $ WORK( IINDERR+STARTI-1 ), 1) + $ WORK( IINDERR+STARTI-1 ), 1) END IF END IF 45 CONTINUE @@ -843,10 +916,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LENGTHI2 = LENGTHI * 2 IF (MYPROC .EQ. 0) THEN * Broadcast eigenvalues and errors to all processors - CALL DCOPY(LENGTHI,W ,1, WORK( INDD ), 1) + CALL DCOPY(LENGTHI,W ,1, WORK( INDD ), 1) CALL DCOPY(LENGTHI,WORK( IINDERR ),1, - $ WORK( INDD+LENGTHI ), 1) - CALL DGEBS2D( ICTXT, 'A', ' ', LENGTHI2, 1, + $ WORK( INDD+LENGTHI ), 1) + CALL DGEBS2D( ICTXT, 'A', ' ', LENGTHI2, 1, $ WORK(INDD), LENGTHI2 ) ELSE SRCROW = 0 @@ -855,15 +928,15 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ WORK(INDD), LENGTHI2, SRCROW, SRCCOL ) CALL DCOPY( LENGTHI, WORK(INDD), 1, W, 1) CALL DCOPY(LENGTHI,WORK(INDD+LENGTHI),1, - $ WORK( IINDERR ), 1) + $ WORK( IINDERR ), 1) END IF ELSE * * Enable point2point communication between collaborators * -* Find collaborators of MYPROC +* Find collaborators of MYPROC IF( (NPROCS.GT.1).AND.(MYIL.GT.0) ) THEN - CALL PMPCOL( MYPROC, NPROCS, IIL, NEEDIL, NEEDIU, + CALL PMPCOL( MYPROC, NPROCS, IIL, NEEDIL, NEEDIU, $ IWORK(INDILU), IWORK(INDILU+NPROCS), $ COLBRT, FRSTCL, LASTCL ) ELSE @@ -872,34 +945,34 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, IF(COLBRT) THEN * If the processor collaborates with others, -* communicate information. +* communicate information. DO 47 IPROC = FRSTCL, LASTCL IF (MYPROC .EQ. IPROC) THEN STARTI = DOL IWORK(1) = STARTI LENGTHI = MYIU - MYIL + 1 IWORK(2) = LENGTHI - + IF ((STARTI.GE.1) .AND. (LENGTHI.GE.1)) THEN * Copy eigenvalues into communication buffer CALL DCOPY(LENGTHI,W( STARTI ),1, - $ WORK(INDD), 1) + $ WORK(INDD), 1) * Copy uncertainties into communication buffer CALL DCOPY(LENGTHI, $ WORK( IINDERR+STARTI-1 ),1, - $ WORK(INDD+LENGTHI), 1) + $ WORK(INDD+LENGTHI), 1) ENDIF - DO 46 I = FRSTCL, LASTCL + DO 46 I = FRSTCL, LASTCL IF(I.EQ.MYPROC) GOTO 46 DSTROW = I/ NPCOL DSTCOL = MOD(I, NPCOL) - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF ((STARTI.GE.1) .AND. (LENGTHI.GE.1)) THEN LENGTHI2 = 2*LENGTHI * send buffer - CALL DGESD2D( ICTXT, LENGTHI2, + CALL DGESD2D( ICTXT, LENGTHI2, $ 1, WORK(INDD), LENGTHI2, $ DSTROW, DSTCOL ) END IF @@ -907,7 +980,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, ELSE SRCROW = IPROC / NPCOL SRCCOL = MOD(IPROC, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) RSTARTI = IWORK(1) RLENGTHI = IWORK(2) @@ -918,10 +991,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ SRCROW, SRCCOL ) * copy eigenvalues from communication buffer CALL DCOPY( RLENGTHI, WORK(INDE), 1, - $ W( RSTARTI ), 1) + $ W( RSTARTI ), 1) * copy uncertainties (errors) from communication buffer CALL DCOPY(RLENGTHI,WORK(INDE+RLENGTHI),1, - $ WORK( IINDERR+RSTARTI-1 ), 1) + $ WORK( IINDERR+RSTARTI-1 ), 1) END IF END IF 47 CONTINUE @@ -934,17 +1007,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * is constructed in parallel from top to bottom, * on level at a time, until all eigenvectors * have been computed. -* +* 100 CONTINUE IF ( MYIL.GT.0 ) THEN CALL DSTEGR2B( JOBZ, N, WORK( INDD2 ), - $ WORK( INDE2+OFFSET ), + $ WORK( INDE2+OFFSET ), $ IM, W( 1 ), WORK( INDRW ), N, N, - $ IWORK( 1 ), WORK( INDWORK ), SIZE1, - $ IWORK( 2*N+1 ), SIZE2, DOL, + $ IWORK( 1 ), WORK( INDWORK ), SIZE1, + $ IWORK( 2*N+1 ), SIZE2, DOL, $ DOU, NEEDIL, NEEDIU, INDWLC, $ PIVMIN, SCALE, WL, WU, - $ VSTART, FINISH, + $ VSTART, FINISH, $ MAXCLS, NDEPTH, PARITY, ZOFFSET, IINFO ) IINDWLC = INDWORK + INDWLC - 1 IF(.NOT.FINISH) THEN @@ -958,7 +1031,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LASTCL = MYPROC ENDIF * -* Check if this processor collaborates, i.e. +* Check if this processor collaborates, i.e. * communication is needed. * IF(COLBRT) THEN @@ -976,23 +1049,23 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Copy eigenvalues into communication buffer CALL DCOPY(LENGTHI, $ WORK( IINDWLC+STARTI-1 ),1, - $ WORK(INDD), 1) + $ WORK(INDD), 1) * Copy uncertainties into communication buffer CALL DCOPY(LENGTHI, $ WORK( IINDERR+STARTI-1 ),1, - $ WORK(INDD+LENGTHI), 1) + $ WORK(INDD+LENGTHI), 1) ENDIF - - DO 146 I = FRSTCL, LASTCL + + DO 146 I = FRSTCL, LASTCL IF(I.EQ.MYPROC) GOTO 146 DSTROW = I/ NPCOL DSTCOL = MOD(I, NPCOL) - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF ((STARTI.GE.1).AND.(LENGTHI.GE.1)) THEN LENGTHI2 = 2*LENGTHI * send buffer - CALL DGESD2D( ICTXT, LENGTHI2, + CALL DGESD2D( ICTXT, LENGTHI2, $ 1, WORK(INDD), LENGTHI2, $ DSTROW, DSTCOL ) END IF @@ -1000,7 +1073,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, ELSE SRCROW = IPROC / NPCOL SRCCOL = MOD(IPROC, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) RSTARTI = IWORK(1) RLENGTHI = IWORK(2) @@ -1011,19 +1084,23 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ SRCROW, SRCCOL ) * copy eigenvalues from communication buffer CALL DCOPY(RLENGTHI, WORK(INDE), 1, - $ WORK( IINDWLC+RSTARTI-1 ), 1) + $ WORK( IINDWLC+RSTARTI-1 ), 1) * copy uncertainties (errors) from communication buffer CALL DCOPY(RLENGTHI,WORK(INDE+RLENGTHI),1, - $ WORK( IINDERR+RSTARTI-1 ), 1) + $ WORK( IINDERR+RSTARTI-1 ), 1) END IF END IF 147 CONTINUE ENDIF - GOTO 100 + GOTO 100 ENDIF ENDIF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2B', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1055,17 +1132,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LENGTHI = 0 ENDIF IWORK(2) = LENGTHI - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF ((STARTI.GE.1).AND.(LENGTHI.GE.1)) THEN - CALL DGESD2D( ICTXT, LENGTHI, + CALL DGESD2D( ICTXT, LENGTHI, $ 1, W( STARTI ), LENGTHI, $ DSTROW, DSTCOL ) ENDIF ELSE IF (MYPROC .EQ. 0) THEN SRCROW = (I-1) / NPCOL SRCCOL = MOD(I-1, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) STARTI = IWORK(1) LENGTHI = IWORK(2) @@ -1100,12 +1177,16 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, CALL DLASRT2( 'I', M, W, IWORK( NPROCS+2 ), IINFO ) IF (IINFO.NE.0) THEN CALL PXERBLA( ICTXT, 'DLASRT2', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF *********************************************************************** * -* TRANSFORM Z FROM 1D WORKSPACE INTO 2D BLOCKCYCLIC STORAGE +* TRANSFORM Z FROM 1D WORKSPACE INTO 2D BLOCKCYCLIC STORAGE * *********************************************************************** IF ( WANTZ ) THEN @@ -1127,12 +1208,12 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, 180 CONTINUE IF ( FIRST ) THEN - CALL PDLAEVSWP(N, WORK( INDRW ), N, Z, IZ, JZ, - $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), + CALL PDLAEVSWP(N, WORK( INDRW ), N, Z, IZ, JZ, + $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), $ INDRW - INDWORK ) ELSE - CALL PDLAEVSWP(N, WORK( INDRW + N ), N, Z, IZ, JZ, - $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), + CALL PDLAEVSWP(N, WORK( INDRW + N ), N, Z, IZ, JZ, + $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), $ INDRW - INDWORK ) END IF * @@ -1151,6 +1232,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, END IF IF (IINFO.NE.0) THEN CALL PXERBLA( ICTXT, 'PDORMTR', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1160,6 +1245,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEVR diff --git a/SRC/pdsyevx.f b/SRC/pdsyevx.f index 708fa07c..471d895f 100644 --- a/SRC/pdsyevx.f +++ b/SRC/pdsyevx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, @@ -8,6 +14,7 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M, @@ -351,7 +358,7 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * required for optimal performance for all work arrays. Each of * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by -* PXERBLA. +* PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace @@ -508,17 +515,62 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*512 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F QUICKRETURN = ( N.EQ.0 ) * * Test the input arguments. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, RANGE, UPLO, IA, IL, INFO, + $ IU, IZ, JA, JZ, LIWORK, LWORK, + $ M, N, NZ, ABSTOL, + $ ORFAC, VL, VU, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDSYEVX inputs:,JOBZ:',A5,',RANGE:',A5, + $ ',UPLO:',A5,',IA:',I5,',IL:',I5,',INFO:',I5, + $ ',IU:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NZ:',I5,',ABSTOL:',F9.4, + $ ',ORFAC:',F9.4,',VL:',F9.4, + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) @@ -739,13 +791,25 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVX', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( QUICKRETURN ) THEN IF( WANTZ ) THEN NZ = 0 @@ -754,6 +818,10 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, M = 0 WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -971,6 +1039,10 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEVX diff --git a/SRC/pdsygs2.f b/SRC/pdsygs2.f index 31cef2ec..06ca0ca1 100644 --- a/SRC/pdsygs2.f +++ b/SRC/pdsygs2.f @@ -1,4 +1,10 @@ * +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) @@ -8,6 +14,7 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N @@ -188,16 +195,52 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, INTEGER INDXG2P EXTERNAL LSAME, INDXG2P * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, IBTYPE, INFO, + $ JA, JB, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDSYGS2 inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',IBTYPE:',I5,',INFO:',I5,',JA:',I5, + $ ',JB:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -251,13 +294,22 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGS2', -INFO ) CALL BLACS_EXIT( ICTXT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) - $ RETURN + IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Compute local information * @@ -414,6 +466,10 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYGS2 diff --git a/SRC/pdsygst.f b/SRC/pdsygst.f index 85bb5cc4..abec3670 100644 --- a/SRC/pdsygst.f +++ b/SRC/pdsygst.f @@ -1,4 +1,10 @@ * +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) @@ -8,6 +14,7 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N @@ -196,10 +203,32 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, INTEGER ICEIL, INDXG2P EXTERNAL LSAME, ICEIL, INDXG2P * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * @@ -208,6 +237,21 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, IBTYPE, INFO, + $ JA, JB, N, SCALE, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDSYGST inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',IBTYPE:',I5,',INFO:',I5,',JA:',I5, + $ ',JB:',I5,',N:',I5,',SCALE:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -268,13 +312,22 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGST', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( IBTYPE.EQ.1 ) THEN IF( UPPER ) THEN @@ -433,6 +486,10 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYGST diff --git a/SRC/pdsygvx.f b/SRC/pdsygvx.f index 5e898bb3..509ccdb8 100644 --- a/SRC/pdsygvx.f +++ b/SRC/pdsygvx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, @@ -9,6 +15,7 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * and University of California, Berkeley. * October 15, 1999 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ, @@ -525,16 +532,58 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*576 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, RANGE, UPLO, IA, IB, IBTYPE, + $ IL, INFO, IU, IZ, JA, JB, JZ, + $ LIWORK, LWORK, M, + $ N, NZ, ABSTOL, ORFAC, VL, VU, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYGVX inputs:,JOBZ:',A5,',RANGE:',A5, + $ ',UPLO:',A5,',IA:',I5,',IB:',I5,',IBTYPE:',I5, + $ ',IL:',I5,',INFO:',I5,',IU:',I5, + $ ',IZ:',I5,',JA:',I5,',JB:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NZ:',I5,',ABSTOL:',F9.4, + $ ',ORFAC:',F9.4,',VL:',F9.4, + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -746,8 +795,16 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGVX ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -759,6 +816,10 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, WORK( 1 ) = DBLE( LWOPT ) IFAIL( 1 ) = INFO INFO = IERRNPD +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -812,6 +873,10 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * IWORK( 1 ) = LIWMIN WORK( 1 ) = DBLE( LWOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYGVX diff --git a/SRC/pdsyngst.f b/SRC/pdsyngst.f index 65512785..43ca40c7 100644 --- a/SRC/pdsyngst.f +++ b/SRC/pdsyngst.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * October 15, 1999 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N @@ -236,9 +243,40 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, IBTYPE, INFO, + $ JA, JB, LWORK, N, SCALE, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDSYNGST inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',IBTYPE:',I5,',INFO:',I5,',JA:',I5, + $ ',JB:',I5,',LWORK:',I5,',N:',I5, + $ ',SCALE:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF SCALE = 1.0D0 * NB = DESCA( MB_ ) @@ -316,20 +354,37 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYNGST', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * IF( IBTYPE.NE.1 .OR. UPPER .OR. LWORK.LT.LWOPT ) THEN CALL PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -419,5 +474,9 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * WORK( 1 ) = DBLE( LWOPT ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END diff --git a/SRC/pdsyntrd.f b/SRC/pdsyntrd.f index 3ba66029..c143b613 100644 --- a/SRC/pdsyntrd.f +++ b/SRC/pdsyntrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -286,16 +293,51 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYNTRD inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -354,15 +396,28 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYNTRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * ONEPMIN = N*N + 3*N + 1 @@ -545,6 +600,10 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * WORK( 1 ) = DBLE( TTLWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYNTRD diff --git a/SRC/pdsytd2.f b/SRC/pdsytd2.f index b316c7fe..5ae69ce1 100644 --- a/SRC/pdsytd2.f +++ b/SRC/pdsytd2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -240,13 +247,42 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYTD2 inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -277,15 +313,28 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Compute local information * @@ -457,6 +506,10 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYTD2 diff --git a/SRC/pdsytrd.f b/SRC/pdsytrd.f index f1597cd3..0dc5a882 100644 --- a/SRC/pdsytrd.f +++ b/SRC/pdsytrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -251,13 +258,42 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYTRD inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -307,15 +343,28 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) @@ -418,6 +467,10 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYTRD diff --git a/SRC/pdsyttrd.f b/SRC/pdsyttrd.f index ac98ed6e..4d506ea0 100644 --- a/SRC/pdsyttrd.f +++ b/SRC/pdsyttrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -456,10 +463,32 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. * * +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * @@ -483,6 +512,19 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYTTRD inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * SAFMAX = SQRT( PDLAMCH( ICTXT, 'O' ) ) / N SAFMIN = SQRT( PDLAMCH( ICTXT, 'S' ) ) @@ -566,13 +608,22 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTTRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * @@ -656,6 +707,10 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTTRD', -INFO ) WORK( 1 ) = DBLE( LWMIN ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1190,6 +1245,10 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * * WORK( 1 ) = DBLE( LWMIN ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYTTRD diff --git a/SRC/pdtrcon.f b/SRC/pdtrcon.f index 28f253c3..21d1b3b1 100644 --- a/SRC/pdtrcon.f +++ b/SRC/pdtrcon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, $ WORK, LWORK, IWORK, LIWORK, INFO ) * @@ -7,6 +13,7 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * May 25, 2001 * * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, INFO, LIWORK, LWORK, N @@ -217,13 +224,44 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, NORM, UPLO, IA, JA, INFO, + $ LIWORK, LWORK, N, RCOND, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRCON inputs:,DIAG:',A5,',NORM:',A5, + $ ',UPLO:',A5,',IA:',I5,',JA:',I5,',INFO:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',N:',I5, + $ ',RCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -302,8 +340,16 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRCON', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -311,6 +357,10 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * IF( N.EQ.0 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -419,6 +469,10 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRCON diff --git a/SRC/pdtrord.f b/SRC/pdtrord.f index 38705743..71cec517 100644 --- a/SRC/pdtrord.f +++ b/SRC/pdtrord.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, $ DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK, $ IWORK, LIWORK, INFO ) @@ -9,6 +15,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -319,7 +326,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, $ PITRAF, PDW, WINEIG, WINSIZ, LLDQ, $ RSRC, CSRC, ILILO, ILIHI, ILSEL, IRSRC, $ ICSRC, IPIW, IPW1, IPW2, IPW3, TIHI, TILO, - $ LIHI, WINDOW, LILO, LSEL, BUFFER, + $ LIHI, WINDOW, LILO, LSEL, INT_BUFFER, $ NMWIN2, BUFFLEN, LROWS, LCOLS, ILOC2, JLOC2, $ WNEICR, WINDOW0, RSRC4, CSRC4, LIHI4, RSRC3, $ CSRC3, RSRC2, CSRC2, LIHIC, LIHI1, ILEN4, @@ -354,12 +361,43 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * .. Local Functions .. INTEGER ICEIL * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCT( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) COMPQ, INFO, LIWORK, LWORK, + $ M, N, IT, JT, IQ, + $ JQ, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRORD inputs:,COMPQ:',A5,',INFO:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',IT:',I5,',JT:',I5,',IQ:',I5, + $ ',JQ:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Test if grid is O.K., i.e., the context is valid. @@ -532,10 +570,18 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, IF( INFO.NE.0 .AND. .NOT.LQUERY ) THEN M = 0 CALL PXERBLA( ICTXT, 'PDTRORD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LQUERY ) THEN WORK( 1 ) = DBLE(LWMIN) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -988,41 +1034,41 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * BUFFLEN = 0. * IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( BUFFLEN.NE.0 ) THEN DO 180 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 180 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW2 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN CALL DGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN CALL DGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF END IF ELSEIF( MYROW.EQ.RSRC .OR. MYCOL.EQ.CSRC ) THEN IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND. MYROW.EQ.RSRC ) $ THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( BUFFLEN.NE.0 ) THEN CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC, CSRC ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC, CSRC ) END IF END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND. MYCOL.EQ.CSRC ) $ THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( BUFFLEN.NE.0 ) THEN CALL DGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC, CSRC ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC, CSRC ) END IF END IF IF((NPCOL.GT.1.AND.DIR.EQ.1.AND.MYROW.EQ.RSRC).OR. @@ -1031,10 +1077,10 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, IF( BUFFLEN.NE.0 ) THEN DO 190 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT(WORK( BUFFER+INDX-1 )) + $ INT(WORK( INT_BUFFER+INDX-1 )) 190 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW2 ), DLEN ) END IF END IF @@ -1079,7 +1125,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * Compute amount of work space necessary for performing * matrix-matrix multiplications. * - PDW = BUFFER + PDW = INT_BUFFER IPW3 = PDW + NWIN*NWIN ELSE FLOPS = 0 @@ -2259,107 +2305,107 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * Broadcast the orthogonal transformations. * IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( (NPROW.GT.1 .AND. DIR.EQ.2) .OR. $ (NPCOL.GT.1 .AND. DIR.EQ.1) ) THEN DO 370 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 370 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW3 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) END IF IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN CALL DGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN CALL DGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF ELSEIF( MYROW.EQ.RSRC1 .OR. MYCOL.EQ.CSRC1 ) THEN IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND. $ MYROW.EQ.RSRC1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC1, CSRC1 ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC1, CSRC1 ) END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND. $ MYCOL.EQ.CSRC1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC1, CSRC1 ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC1, CSRC1 ) END IF IF( (NPCOL.GT.1.AND.DIR.EQ.1.AND.MYROW.EQ.RSRC1) $ .OR. (NPROW.GT.1.AND.DIR.EQ.2.AND. $ MYCOL.EQ.CSRC1) ) THEN DO 380 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT( WORK( BUFFER+INDX-1 ) ) + $ INT( WORK( INT_BUFFER+INDX-1 ) ) 380 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW3 ), DLEN ) END IF END IF IF( RSRC1.NE.RSRC4 ) THEN IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN DO 390 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 390 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW3 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) CALL DGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, - $ 1, WORK(BUFFER), BUFFLEN ) + $ 1, WORK(INT_BUFFER), BUFFLEN ) END IF ELSEIF( MYROW.EQ.RSRC4 .AND. DIR.EQ.1 .AND. $ NPCOL.GT.1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, - $ 1, WORK(BUFFER), BUFFLEN, RSRC4, CSRC4 ) + $ 1, WORK(INT_BUFFER), BUFFLEN, RSRC4, CSRC4 ) DO 400 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT( WORK( BUFFER+INDX-1 ) ) + $ INT( WORK( INT_BUFFER+INDX-1 ) ) 400 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW3 ), DLEN ) END IF END IF IF( CSRC1.NE.CSRC4 ) THEN IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN DO 395 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 395 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW3 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) CALL DGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, - $ 1, WORK(BUFFER), BUFFLEN ) + $ 1, WORK(INT_BUFFER), BUFFLEN ) END IF ELSEIF( MYCOL.EQ.CSRC4 .AND. DIR.EQ.2 .AND. $ NPROW.GT.1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC4, CSRC4 ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC4, CSRC4 ) DO 402 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT( WORK( BUFFER+INDX-1 ) ) + $ INT( WORK( INT_BUFFER+INDX-1 ) ) 402 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW3 ), DLEN ) END IF END IF @@ -2390,7 +2436,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, IF( ((MYCOL.EQ.CSRC1.OR.MYCOL.EQ.CSRC4).AND.DIR.EQ.2) $ .OR. ((MYROW.EQ.RSRC1.OR.MYROW.EQ.RSRC4).AND. $ DIR.EQ.1)) THEN - IPW4 = BUFFER + IPW4 = INT_BUFFER IF( DIR.EQ.2 ) THEN IF( WANTQ ) THEN QROWS = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), @@ -3457,6 +3503,10 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * * Return to calling program. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRORD diff --git a/SRC/pdtrrfs.f b/SRC/pdtrrfs.f index 98c13e7b..3995563e 100644 --- a/SRC/pdtrrfs.f +++ b/SRC/pdtrrfs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER INFO, IA, IB, IX, JA, JB, JX, LIWORK, LWORK, @@ -281,13 +288,46 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, TRANS, UPLO, INFO, IA, + $ IB, IX, JA, JB, JX, LIWORK, LWORK, + $ N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDTRRFS inputs:,DIAG:',A5,',TRANS:',A5, + $ ',UPLO:',A5,',INFO:',I5,',IA:',I5,',IB:',I5, + $ ',IX:',I5,',JA:',I5,',JB:',I5, + $ ',JX:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -402,8 +442,16 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRRFS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -418,6 +466,10 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -789,6 +841,10 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRRFS diff --git a/SRC/pdtrsen.f b/SRC/pdtrsen.f index c65ea911..cce59220 100644 --- a/SRC/pdtrsen.f +++ b/SRC/pdtrsen.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, $ DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, S, SEP, WORK, LWORK, $ IWORK, LIWORK, INFO ) @@ -10,6 +16,7 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -379,12 +386,45 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCT( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) COMPQ, JOB, INFO, LIWORK, LWORK, + $ M, N, IT, JT, + $ IQ, JQ, S, SEP, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDTRSEN inputs:,COMPQ:',A5,',JOB:',A5, + $ ',INFO:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',IT:',I5,',JT:',I5, + $ ',IQ:',I5,',JQ:',I5,',S:',F9.4, + $ ',SEP:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Test if grid is O.K., i.e., the context is valid @@ -615,10 +655,18 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, S = ONE SEP = ZERO CALL PXERBLA( ICTXT, 'PDTRSEN', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LQUERY ) THEN WORK( 1 ) = DBLE(LWMIN) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -703,6 +751,10 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, * 50 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRSEN diff --git a/SRC/pdtrti2.f b/SRC/pdtrti2.f index 6d8ba7e5..ac243b32 100644 --- a/SRC/pdtrti2.f +++ b/SRC/pdtrti2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N @@ -147,13 +154,42 @@ SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) LOGICAL LSAME EXTERNAL LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, UPLO, IA, INFO, JA, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRTI2 inputs:,DIAG:',A5,',UPLO:',A5, + $ ',IA:',I5,',INFO:',I5,',JA:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -173,6 +209,10 @@ SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTI2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * diff --git a/SRC/pdtrtri.f b/SRC/pdtrtri.f index 719a0ab0..88fc4b00 100644 --- a/SRC/pdtrtri.f +++ b/SRC/pdtrtri.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N @@ -160,13 +167,42 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, UPLO, IA, INFO, JA, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRTRI inputs:,DIAG:',A5,',UPLO:',A5, + $ ',IA:',I5,',INFO:',I5,',JA:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test input parameters * INFO = 0 @@ -210,13 +246,22 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTRI', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Check for singularity if non-unit. * @@ -265,8 +310,13 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMMY, $ IDUMMY, -1, -1, MYCOL ) - IF( INFO.NE.0 ) - $ RETURN + IF( INFO.NE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF END IF * * Use blocked code @@ -346,6 +396,10 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End PDTRTRI diff --git a/SRC/pdtrtrs.f b/SRC/pdtrtrs.f index 132640af..1e5ae630 100644 --- a/SRC/pdtrtrs.f +++ b/SRC/pdtrtrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -190,13 +197,44 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, TRANS, UPLO, IA, IB, INFO, + $ JA, JB, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDTRTRS inputs:,DIAG:',A5,',TRANS:',A5, + $ ',UPLO:',A5,',IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test input parameters * INFO = 0 @@ -261,13 +299,22 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. NRHS.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Check for singularity if non-unit. * @@ -317,8 +364,13 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUM, IDUM, $ -1, -1, MYCOL ) - IF( INFO.NE.0 ) - $ RETURN + IF( INFO.NE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF END IF * * Solve A * x = b or A' * x = b. @@ -326,6 +378,10 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, CALL PDTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, IA, JA, $ DESCA, B, IB, JB, DESCB ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRTRS diff --git a/SRC/pdtzrzf.f b/SRC/pdtzrzf.f index f72342fb..9c9727ef 100644 --- a/SRC/pdtzrzf.f +++ b/SRC/pdtzrzf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -211,13 +218,42 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTZRZF inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -256,15 +292,28 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTZRZF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( M.EQ.N ) THEN * @@ -327,6 +376,10 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTZRZF diff --git a/SRC/pdzsum1.f b/SRC/pdzsum1.f index 09e5f6f3..23950091 100644 --- a/SRC/pdzsum1.f +++ b/SRC/pdzsum1.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION ASUM @@ -163,16 +170,49 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IX, INCX, JX, N, ASUM, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDZSUM1 inputs:,IX:',I5,',INCX:',I5,',JX:',I5, + $ ',N:',I5,',ASUM:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Quick return if possible * ASUM = ZERO - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * LDX = DESCX( LLD_ ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, @@ -182,6 +222,10 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.IXCOL ) THEN ASUM = ABS( X( IIX+(JJX-1)*LDX ) ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -217,6 +261,10 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDZSUM1 From 9fda3558f0ce3296ff39b977a45ed1e83ddeed2b Mon Sep 17 00:00:00 2001 From: nprasadm Date: Fri, 21 Apr 2023 18:21:26 +0530 Subject: [PATCH 22/30] AOCL_PROGRESS: Modification to avoid OOB access in scalapack added. 1) To prevent OOB access, aocl progress buffer which holds API name and the length variable made as const objects. 2) Modification added for cholesky, QR, LU factorization for s,d,c,z data types. Signed-off-by: Nagendra AMD-Internal: [CPUPL-2506] Change-Id: If855f6473fb5798c5823f1e554b7bc8c33c260a5 --- SRC/aocl_scalapack_progress.c | 8 ++--- SRC/aocl_scalapack_progress.h | 24 +++++++------- SRC/pcgeqrf.f | 41 +++++++++++++++++++---- SRC/pcgetrf.f | 45 ++++++++++++++++++++----- SRC/pcpotrf.f | 59 ++++++++++++++++++++++++++------- SRC/pdgeqrf.f | 38 ++++++++++++++++----- SRC/pdgetrf0.f | 30 ++++++++++++----- SRC/pdpotrf.f | 49 ++++++++++++++++++++------- SRC/psgeqrf.f | 41 +++++++++++++++++++---- SRC/psgetrf.f | 48 +++++++++++++++++++++------ SRC/pspotrf.f | 60 ++++++++++++++++++++++++++------- SRC/pzgeqrf.f | 40 ++++++++++++++++++---- SRC/pzgetrf.f | 45 ++++++++++++++++++++----- SRC/pzpotrf.f | 62 ++++++++++++++++++++++++++++------- 14 files changed, 467 insertions(+), 123 deletions(-) diff --git a/SRC/aocl_scalapack_progress.c b/SRC/aocl_scalapack_progress.c index dbd6af4f..7aff206e 100644 --- a/SRC/aocl_scalapack_progress.c +++ b/SRC/aocl_scalapack_progress.c @@ -20,17 +20,17 @@ void aocl_scalapack_set_progress_( aocl_scalapack_progress_callback func ) aocl_scalapack_progress_ptr_ = func; } -integer aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, +aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, const integer* current_process, const integer *total_processes) { integer ret = 0; - if (aocl_scalapack_progress_ptr_ != NULL ) { + if (aocl_scalapack_progress_ptr_ != NULL ) { ret = aocl_scalapack_progress_ptr_ ( api, lenapi, progress, current_process, total_processes); } - + return ret; } -integer AOCL_SCALAPACK_PROGRESS(const char* const api, const integer* lenapi, const integer* progress, +AOCL_SCALAPACK_PROGRESS(const char* const api, const integer* lenapi, const integer* progress, const integer* current_process, const integer* total_processes) { integer ret = 0; diff --git a/SRC/aocl_scalapack_progress.h b/SRC/aocl_scalapack_progress.h index 31099a4c..82b34bab 100644 --- a/SRC/aocl_scalapack_progress.h +++ b/SRC/aocl_scalapack_progress.h @@ -30,19 +30,19 @@ const integer *total_processes ); -integer aocl_scalapack_progress_( - const char* const api, - const integer* lenapi, - const integer* progress, - const integer* current_process, - const integer* total_processes +aocl_scalapack_progress_( + const char* const api, + const integer* lenapi, + const integer* progress, + const integer* current_process, + const integer* total_processes ); -integer AOCL_SCALAPACK_PROGRESS( - const char* const api, - const integer* lenapi, - const integer* progress, - const integer* current_process, - const integer* total_processes +AOCL_SCALAPACK_PROGRESS( + const char* const api, + const integer* lenapi, + const integer* progress, + const integer* current_process, + const integer* total_processes ); aocl_scalapack_progress_callback aocl_scalapack_progress_ptr_; diff --git a/SRC/pcgeqrf.f b/SRC/pcgeqrf.f index be33f189..d186ecf5 100644 --- a/SRC/pcgeqrf.f +++ b/SRC/pcgeqrf.f @@ -1,9 +1,13 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PCGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -171,11 +175,18 @@ SUBROUTINE PCGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP @@ -197,6 +208,12 @@ SUBROUTINE PCGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC CMPLX, MIN, MOD, REAL * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -262,10 +279,12 @@ SUBROUTINE PCGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, JB = JN - JA + 1 * #ifdef AOCL_PROGRESS +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PCGEQRF' + END IF #endif * * @@ -295,10 +314,18 @@ SUBROUTINE PCGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) -#endif +* Update the progress and callback if progress is enabled * + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF +#endif * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) diff --git a/SRC/pcgetrf.f b/SRC/pcgetrf.f index e8a59108..70669c64 100644 --- a/SRC/pcgetrf.f +++ b/SRC/pcgetrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 10, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PCGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -150,11 +154,18 @@ SUBROUTINE PCGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -170,6 +181,12 @@ SUBROUTINE PCGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -230,10 +247,12 @@ SUBROUTINE PCGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) JB = JN - JA + 1 * #ifdef AOCL_PROGRESS - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PCGETRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * * Factor diagonal and subdiagonal blocks and test for exact @@ -271,10 +290,20 @@ SUBROUTINE PCGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * +* * Factor diagonal and subdiagonal blocks and test for exact * singularity. * diff --git a/SRC/pcpotrf.f b/SRC/pcpotrf.f index 2969e597..2ded727f 100644 --- a/SRC/pcpotrf.f +++ b/SRC/pcpotrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -148,12 +152,18 @@ SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) PARAMETER ( ONE = 1.0E+0 ) COMPLEX CONE PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) -* +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP @@ -176,6 +186,12 @@ SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -228,11 +244,14 @@ SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PCPOTRF' - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif +* IF( UPPER ) THEN * * Split-ring topology for the communication along process @@ -276,8 +295,17 @@ SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -346,8 +374,17 @@ SUBROUTINE PCPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block diff --git a/SRC/pdgeqrf.f b/SRC/pdgeqrf.f index 69c0b959..9b0636fc 100644 --- a/SRC/pdgeqrf.f +++ b/SRC/pdgeqrf.f @@ -3,7 +3,6 @@ * * -- ScaLAPACK routine -- * -* #include "SL_Context_fortran_include.h" * SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, @@ -176,11 +175,18 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP @@ -208,6 +214,12 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * Variable names + Variable values(num_vars *10) CHARACTER BUFFER*256 CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Initialize framework context structure if not initialized @@ -309,10 +321,12 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, JB = JN - JA + 1 * #ifdef AOCL_PROGRESS +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PDGEQRF' + END IF #endif * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) @@ -341,10 +355,18 @@ SUBROUTINE PDGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) -#endif +* Update the progress and callback if progress is enabled * + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF +#endif * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) diff --git a/SRC/pdgetrf0.f b/SRC/pdgetrf0.f index d9ce0e99..f8bc35ab 100644 --- a/SRC/pdgetrf0.f +++ b/SRC/pdgetrf0.f @@ -2,7 +2,7 @@ * Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. * #include "SL_Context_fortran_include.h" - +* * ===================================================================== * SUBROUTINE PDGETRF0 * ===================================================================== @@ -150,13 +150,15 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * .. +#ifdef AOCL_PROGRESS * .. AOCL Progress variables .. - INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS, RET - -* .. Declaring below 'API NAME' string and its length as const objects + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS + +* .. Declaring 'API NAME' and its length as const objects * .. API_NAME string terminated with 'NULL' character. CHARACTER*8, PARAMETER :: API_NAME = 'PDGETRF' // C_NULL_CHAR - INTEGER, PARAMETER :: LSTAGE = 8 + INTEGER, PARAMETER :: LEN_API_NAME = 8 +#endif * .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) @@ -233,12 +235,15 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) IN = MIN( ICEIL( IA, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) )*DESCA( NB_ ), JA+MN-1 ) JB = JN - JA + 1 +#ifdef AOCL_PROGRESS +* +* Set the AOCL progress variables related to rank, processes * IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL END IF - +#endif * Factor diagonal and subdiagonal blocks and test for exact * singularity. * @@ -272,13 +277,20 @@ SUBROUTINE PDGETRF0( M, N, A, IA, JA, DESCA, IPIV, INFO ) DO 10 J = JN+1, JA+MN-1, DESCA( NB_ ) JB = MIN( MN-J+JA, DESCA( NB_ ) ) I = IA + J - JA +#ifdef AOCL_PROGRESS +* +* Update the progress and callback if progress is enabled +* IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN -* Capture the Loop count 'J' to a separate 'PROGRESS' variable -* to avoid the corruption at application side. +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* PROGRESS = J - RET = AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, + RET = AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) END IF +#endif * * Factor diagonal and subdiagonal blocks and test for exact * singularity. diff --git a/SRC/pdpotrf.f b/SRC/pdpotrf.f index 85ff50fd..01422d8a 100644 --- a/SRC/pdpotrf.f +++ b/SRC/pdpotrf.f @@ -1,9 +1,7 @@ * -* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2022-23 Advanced Micro Devices, Inc.  All rights reserved. * * -- ScaLAPACK routine -- -* June 20, 2022 -* * #include "SL_Context_fortran_include.h" * @@ -159,11 +157,18 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -266,10 +271,12 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) END IF * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PDPOTRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) @@ -315,9 +322,19 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -384,9 +401,19 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block diff --git a/SRC/psgeqrf.f b/SRC/psgeqrf.f index 6f19f1fe..cfcfdcf2 100644 --- a/SRC/psgeqrf.f +++ b/SRC/psgeqrf.f @@ -1,9 +1,13 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -171,11 +175,18 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP @@ -197,6 +208,12 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -262,10 +279,12 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, JB = JN - JA + 1 * #ifdef AOCL_PROGRESS +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PSGEQRF' + END IF #endif * * @@ -295,10 +314,18 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) -#endif +* Update the progress and callback if progress is enabled * + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF +#endif * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) diff --git a/SRC/psgetrf.f b/SRC/psgetrf.f index ba9be56d..3732d156 100644 --- a/SRC/psgetrf.f +++ b/SRC/psgetrf.f @@ -1,8 +1,11 @@ * -- ScaLAPACK routine -- -* Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. -* June 10, 2022 +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -150,11 +153,18 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -170,6 +180,12 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -230,10 +246,12 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) JB = JN - JA + 1 * #ifdef AOCL_PROGRESS - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PSGETRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * * Factor diagonal and subdiagonal blocks and test for exact @@ -271,10 +289,20 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * +* * Factor diagonal and subdiagonal blocks and test for exact * singularity. * diff --git a/SRC/pspotrf.f b/SRC/pspotrf.f index 360d25a5..461ae517 100644 --- a/SRC/pspotrf.f +++ b/SRC/pspotrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -152,12 +156,18 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) CHARACTER COLBTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW -* +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -174,6 +184,12 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -223,10 +239,12 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) $ RETURN * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PSPOTRF' - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) @@ -272,9 +290,19 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -341,9 +369,19 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block diff --git a/SRC/pzgeqrf.f b/SRC/pzgeqrf.f index 9490d51e..b1847de3 100644 --- a/SRC/pzgeqrf.f +++ b/SRC/pzgeqrf.f @@ -1,9 +1,13 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -171,11 +175,18 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP @@ -198,6 +209,12 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -263,10 +280,12 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, JB = JN - JA + 1 * #ifdef AOCL_PROGRESS +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PZGEQRF' + END IF #endif * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) @@ -295,8 +314,17 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Compute the QR factorization of the current block diff --git a/SRC/pzgetrf.f b/SRC/pzgetrf.f index 4ed09c8a..550d9ffc 100644 --- a/SRC/pzgetrf.f +++ b/SRC/pzgetrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 10, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -150,10 +154,17 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif +* .. * * .. * .. Local Arrays .. @@ -171,6 +182,12 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -231,10 +248,12 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) JB = JN - JA + 1 * #ifdef AOCL_PROGRESS - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PZGETRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * * Factor diagonal and subdiagonal blocks and test for exact @@ -272,10 +291,20 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * +* * Factor diagonal and subdiagonal blocks and test for exact * singularity. * diff --git a/SRC/pzpotrf.f b/SRC/pzpotrf.f index cc00003d..750fde3e 100644 --- a/SRC/pzpotrf.f +++ b/SRC/pzpotrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -148,11 +152,17 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) -* +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif +* .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP @@ -175,8 +185,18 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* + CALL AOCL_SCALAPACK_INIT( ) +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) @@ -224,10 +244,12 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) $ RETURN * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PDPOTRF' - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) @@ -275,8 +297,17 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -345,8 +376,17 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block From 92e386205bf6ba5f7375e01491657db539c3c1b1 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Thu, 27 Apr 2023 17:49:51 +0530 Subject: [PATCH 23/30] Fix added for the build failure with Intel compiler toolchain(icc + Intel MPI) in Linux. Signed-off-by: Nagendra AMD-Internal: [CPUPL-3272] Change-Id: I2c16a0b717f2ce12ea3ac65bd01b431ac309cf31 --- SRC/pdsyevx.f | 3 ++- SRC/pdsygvx.f | 3 ++- SRC/pdtrord.f | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/SRC/pdsyevx.f b/SRC/pdsyevx.f index 471d895f..4592c77c 100644 --- a/SRC/pdsyevx.f +++ b/SRC/pdsyevx.f @@ -568,7 +568,8 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, $ ',M:',I5,',N:',I5,',NZ:',I5,',ABSTOL:',F9.4, $ ',ORFAC:',F9.4,',VL:',F9.4, - $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) AOCL_DTL_LOG_ENTRY_F END IF INFO = 0 diff --git a/SRC/pdsygvx.f b/SRC/pdsygvx.f index 509ccdb8..4e598119 100644 --- a/SRC/pdsygvx.f +++ b/SRC/pdsygvx.f @@ -580,7 +580,8 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, $ ',M:',I5,',N:',I5,',NZ:',I5,',ABSTOL:',F9.4, $ ',ORFAC:',F9.4,',VL:',F9.4, - $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) AOCL_DTL_LOG_ENTRY_F END IF * diff --git a/SRC/pdtrord.f b/SRC/pdtrord.f index 71cec517..1f63fb76 100644 --- a/SRC/pdtrord.f +++ b/SRC/pdtrord.f @@ -2369,8 +2369,8 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, $ NPCOL.GT.1 ) THEN INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN - CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, - $ 1, WORK(INT_BUFFER), BUFFLEN, RSRC4, CSRC4 ) + CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1, + $ WORK(INT_BUFFER), BUFFLEN, RSRC4, CSRC4 ) DO 400 INDX = 1, ILEN IWORK(IPIW+INDX-1) = $ INT( WORK( INT_BUFFER+INDX-1 ) ) From cdfebb8038979d8431283c04e465121d5e656e79 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Fri, 5 May 2023 15:45:00 +0530 Subject: [PATCH 24/30] Fix added for the build error related to multiple definition of 'global_thread_mutex' object when linked with libFlame with gcc-12. Renamed 'global_thread_mutex' object to 'sl_global_thread_mutex'. Signed-off-by: Nagendra AMD-Internal: [CPUPL-3299] Change-Id: I60d7c3897de9e4563a2fe56c60a8d15497cb6a95 --- FRAMEWORK/SL_Context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/FRAMEWORK/SL_Context.c b/FRAMEWORK/SL_Context.c index 12235be8..e4de920f 100644 --- a/FRAMEWORK/SL_Context.c +++ b/FRAMEWORK/SL_Context.c @@ -102,7 +102,7 @@ void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) **/ aocl_scalapack_global_context scalapack_context = {0,0,0}; /* A mutex to allow synchronous access to global_thread. */ -scalapack_pthread_mutex_t global_thread_mutex = SL_PTHREAD_MUTEX_INITIALIZER; +scalapack_pthread_mutex_t sl_global_thread_mutex = SL_PTHREAD_MUTEX_INITIALIZER; /******************************************************************************** * \brief scalapack_env_get_var is a function used to query the environment * variable and convert the string into integer and return the same @@ -216,8 +216,8 @@ void scalapack_thread_set_num_threads(int n_threads) // We must ensure that global_thread has been initialized. aocl_scalapack_init_(); // Acquire the mutex protecting global_thread. - scalapack_pthread_mutex_lock(&global_thread_mutex); + scalapack_pthread_mutex_lock(&sl_global_thread_mutex); scalapack_context.num_threads = n_threads; // Release the mutex protecting global_thread. - scalapack_pthread_mutex_unlock(&global_thread_mutex); + scalapack_pthread_mutex_unlock(&sl_global_thread_mutex); } From 24c39001d417779537f7da5510866a58c41c179c Mon Sep 17 00:00:00 2001 From: nprasadm Date: Tue, 9 May 2023 18:15:31 +0530 Subject: [PATCH 25/30] Version string updated for 4.1.0 Beta The API 'get_aocl_scalapack_version' modified with additional parameter for the 'version string length'. Signed-off-by: Nagendra Change-Id: Ic9730cf756d35504d83f7e7c48c8ebd524d97369 --- SRC/get_aocl_scalapack_version.c | 11 +++++++---- TESTING/LIN/pdludriver.f | 9 +++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/SRC/get_aocl_scalapack_version.c b/SRC/get_aocl_scalapack_version.c index 78e0bba7..4de98b69 100644 --- a/SRC/get_aocl_scalapack_version.c +++ b/SRC/get_aocl_scalapack_version.c @@ -20,14 +20,15 @@ #define _VERSION_MAKE_STR(x) #x #ifdef __STDC__ -void get_aocl_scalapack_version_( char * version ) +void get_aocl_scalapack_version_( char * version, int *ver_str_len ) #else -void get_aocl_scalapack_version_( version ) +void get_aocl_scalapack_version_( version, ver_str_len ) char * version; + int *ver_str_len; #endif { #ifdef AOCL_SCALAPACK_VERSION - char slmainversion[] = "AOCL-ScaLAPACK 4.0.1 "; + char slmainversion[] = "AOCL-ScaLAPACK 4.1.0 Beta "; char slversion[1000]; char scalapackversion[] = ", supports ScaLAPACK 2.2.0"; int length, i; @@ -50,9 +51,11 @@ void get_aocl_scalapack_version_( version ) } slversion[length] = '\0'; + *ver_str_len = length; strcpy(version, slversion); #else - strcpy(version, "AOCL-ScaLAPACK 4.0.1, supports ScaLAPACK 2.2.0"); + strcpy(version, "AOCL-ScaLAPACK 4.1.0 Beta, supports ScaLAPACK 2.2.0"); + *ver_str_len = strlen("AOCL-ScaLAPACK 4.1.0 Beta, supports ScaLAPACK 2.2.0"); #endif return; } diff --git a/TESTING/LIN/pdludriver.f b/TESTING/LIN/pdludriver.f index 52dc6e4d..f21b46fd 100644 --- a/TESTING/LIN/pdludriver.f +++ b/TESTING/LIN/pdludriver.f @@ -116,13 +116,14 @@ PROGRAM PDLUDRIVER $ NBVAL( NTESTS ), NRVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) -#ifndef DYNAMIC_WORK_MEM_ALLOC +#ifndef DYNAMIC_WORK_MEM_ALLOC DOUBLE PRECISION MEM( MEMSIZ ), CTIME( 2 ), WTIME( 2 ) #else DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) DOUBLE PRECISION, allocatable :: MEM (:) #endif CHARACTER SVERSION( 100 ) + INTEGER VER_STR_LEN * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, @@ -164,9 +165,9 @@ PROGRAM PDLUDRIVER * Print version * IF( IAM.EQ.0 ) THEN - CALL GET_AOCL_SCALAPACK_VERSION( SVERSION ) - WRITE(*, *) - WRITE(*, *) 'AOCL Version: ', SVERSION + CALL GET_AOCL_SCALAPACK_VERSION( SVERSION, VER_STR_LEN ) + WRITE(*, *) + WRITE(*, *) 'AOCL Version: ', SVERSION(1:VER_STR_LEN) END IF * * Print headings From 73a53e84a3be40028b13604461e0a2bb2acbaef0 Mon Sep 17 00:00:00 2001 From: arunchan Date: Mon, 8 May 2023 13:25:33 +0530 Subject: [PATCH 26/30] Fix the windows build error while linking to global context Signed-off-by: arunchan AMD-Internal: [CPUPL-3341] Change-Id: I2750b508429f04a82047bbd9550c35452e740dee --- TESTING/EIG/CMakeLists.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/TESTING/EIG/CMakeLists.txt b/TESTING/EIG/CMakeLists.txt index 7addff7c..43bb00e0 100644 --- a/TESTING/EIG/CMakeLists.txt +++ b/TESTING/EIG/CMakeLists.txt @@ -7,6 +7,7 @@ set (dmatgen pdmatgen.f pmatgeninc.f) set (cmatgen pcmatgen.f pmatgeninc.f) set (zmatgen pzmatgen.f pmatgeninc.f) set (TTRD_SRC ${CMAKE_SOURCE_DIR}/SRC) +set (FRAMEWORK_SRC ${CMAKE_SOURCE_DIR}/FRAMEWORK) if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES Clang) add_definitions(-D__STDC__) @@ -23,10 +24,10 @@ add_executable(xchrd pchrddriver.f pchrdinfo.f pcgehdrv.f pclafchk.f ${cmatgen}) add_executable(xzhrd pzhrddriver.f pzhrdinfo.f pzgehdrv.f pzlafchk.f ${zmatgen}) if(MSVC) -add_executable(xstrd pstrddriver.f psttrdtester.f pslatran.f pstrdinfo.f pssytdrv.f pslafchk.f ${TTRD_SRC}/pssyttrd.f xpjlaenv.f ${smatgen}) -add_executable(xdtrd pdtrddriver.f pdttrdtester.f pdlatran.f pdtrdinfo.f pdsytdrv.f pdlafchk.f ${TTRD_SRC}/pdsyttrd.f xpjlaenv.f ${dmatgen}) -add_executable(xctrd pctrddriver.f pcttrdtester.f pclatran.f pctrdinfo.f pchetdrv.f pclafchk.f ${TTRD_SRC}/pchettrd.f xpjlaenv.f ${cmatgen}) -add_executable(xztrd pztrddriver.f pzttrdtester.f pzlatran.f pztrdinfo.f pzhetdrv.f pzlafchk.f ${TTRD_SRC}/pzhettrd.f xpjlaenv.f ${zmatgen}) +add_executable(xstrd pstrddriver.f psttrdtester.f pslatran.f pstrdinfo.f pssytdrv.f pslafchk.f ${TTRD_SRC}/pssyttrd.f xpjlaenv.f ${smatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) +add_executable(xdtrd pdtrddriver.f pdttrdtester.f pdlatran.f pdtrdinfo.f pdsytdrv.f pdlafchk.f ${TTRD_SRC}/pdsyttrd.f xpjlaenv.f ${dmatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) +add_executable(xctrd pctrddriver.f pcttrdtester.f pclatran.f pctrdinfo.f pchetdrv.f pclafchk.f ${TTRD_SRC}/pchettrd.f xpjlaenv.f ${cmatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) +add_executable(xztrd pztrddriver.f pzttrdtester.f pzlatran.f pztrdinfo.f pzhetdrv.f pzlafchk.f ${TTRD_SRC}/pzhettrd.f xpjlaenv.f ${zmatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) else() add_executable(xstrd pstrddriver.f psttrdtester.f pslatran.f pstrdinfo.f pssytdrv.f pslafchk.f xpjlaenv.f ${smatgen}) add_executable(xdtrd pdtrddriver.f pdttrdtester.f pdlatran.f pdtrdinfo.f pdsytdrv.f pdlafchk.f xpjlaenv.f ${dmatgen}) From 65148f5dc9b428ec83668f55e37a4320da3c0c56 Mon Sep 17 00:00:00 2001 From: arunchan Date: Mon, 15 May 2023 15:23:10 +0530 Subject: [PATCH 27/30] Update the README files for 4.1 release Signed-off-by: arunchan AMD-Internal: [CPUPL-2702] Change-Id: Id5c6224557b68747e46bd5ba2fe76043e2080a15 --- CMakeLists.txt | 1 + README_ScaLAPACK_AMD | 14 +++--- TESTING/AOCL_PROGRESS_TESTS/README.txt | 59 +++++++++++++++----------- TESTING/README.txt | 39 +++++++++++++++++ 4 files changed, 82 insertions(+), 31 deletions(-) create mode 100644 TESTING/README.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 31fcce7f..eef4e862 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -526,3 +526,4 @@ install(FILES install(EXPORT scalapack-targets DESTINATION lib/cmake/scalapack-${SCALAPACK_VERSION}) +file(COPY scalapack_test.sh DESTINATION ${SCALAPACK_BINARY_DIR}) diff --git a/README_ScaLAPACK_AMD b/README_ScaLAPACK_AMD index 5fca3ca3..2c97560b 100644 --- a/README_ScaLAPACK_AMD +++ b/README_ScaLAPACK_AMD @@ -35,14 +35,16 @@ processors. c. To Build the AOCL-ScaLAPACK library and the test suite, Run the below commands: $ cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp /libblis-mt.a" - -DLAPACK_LIBRARIES="/libflame.a" + -DLAPACK_LIBRARIES="-lstdc++ /libflame.a" -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 -DUSE_OPTIMIZED_LAPACK_BLAS=OFF [-D DENABLE_ILP64=ON] $ make -j - This command generates the AOCL-ScaLAPACK library in the 'build/lib' folder and test applications in the 'build/TESTING' folder. + This command generates the AOCL-ScaLAPACK library in the 'build/lib' folder + and test applications in the 'build/TESTING' folder. -4. To Run the AOCL-ScaLAPACK test suite, Run the below script in the 'build/' folder: - cp -f ../scalapack_test.sh . - ./scalapack_test.sh - Test logs will be generated in 'ScalaPack_TestResults.txt'. +4. To execute the AOCL-ScaLAPACK test suite, run scalapack_test.sh from + 'build/' directory: + + $ ./scalapack_test.sh + Refer TESTING/README.txt to know more about scalapack_test.sh diff --git a/TESTING/AOCL_PROGRESS_TESTS/README.txt b/TESTING/AOCL_PROGRESS_TESTS/README.txt index e9dfd9c5..70f26930 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/README.txt +++ b/TESTING/AOCL_PROGRESS_TESTS/README.txt @@ -1,23 +1,29 @@ -Checking AOCL-ScaLAPACK Operation Progress -=========================================== +Checking the progress of AOCL-ScaLAPACK Operations +================================================== -AOCL libraries may be used to perform lengthy computations (for example, matrix multiplications, solver involving large matrices). These operations/computations may go on for hours. +AOCL libraries may be used to perform lengthy computations (Eg: matrix multiplications, +solver involving large matrices, etc). These operations/computations may go on for hours. -AOCL progress feature provides mechanism for the application to check how far the computations have progressed. Selected set of APIs of AOCL libraries periodically updates the application with progress made so far via a callback function. +AOCL progress feature provides mechanism for the application to check how far +the computations have progressed. Selected set of APIs of AOCL libraries +periodically updates the application with progress made so far via a callback function. -Usage: -====== -The Application needs to define a callback function in specific format and register this callback function with the AOCL-ScaLAPACK library. +AOCL progress is supported for the below APIs: + 1) Cholesky (pcpotrf, pdpotrf, pspotrf, pzpotrf) + 2) LU factorization (pcgetrf, pdgetrf, psgetrf, pzgetrf) + 3) QR factorization (pcgeqrf, pdgeqrf, psgeqrf, pzgeqrf) -The callback function prototype must be as defined below. -int aocl_scalapack_progress( -const char * const api, -const integer *lenapi, -const integer *progress, -const integer *mpi_rank, -const integer *total_mpi_processes -) +Usage +===== +The Application needs to define a callback function in specific +format and register this callback function with the AOCL-ScaLAPACK library. +The callback function prototype must be as defined below. +int aocl_scalapack_progress(const char *const api, + const integer *lenapi, + const integer *progress, + const integer *mpi_rank, + const integer *total_mpi_processes) The table below explains various parameters: ----------------------------------------------------------------------------- @@ -29,29 +35,32 @@ progress | Linear progress made in current thread so far mpi_rank | Current process rank total_mpi_processes | Total number of processes used to perform the operation -Callback Registration: +Callback Registration: ---------------------- -The callback function must be registered with library for it to report the progress. +The callback function must be registered with library for it to report the progress. -aocl_scalapack_set_progress(aocl_scalapack_progress); +aocl_scalapack_set_progress(aocl_scalapack_progress); Example: -------- -int aocl_scalapack_progress(const char* const api, const int *lenapi, const int *progress, const int *mpi_rank, const int *total_mpi_processes) +int aocl_scalapack_progress(const char* const api, const int *lenapi, + const int *progress, const int *mpi_rank, + const int *total_mpi_processes) { - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); + printf("In AOCL Progress MPI Rank:%i, API:%s, progress:%i, MPI processes:%i\n", + *mpi_rank, api, *progress,*total_mpi_processes); return 0; } +Procedure to build and run the sample application with aocl progress feature +============================================================================ -Procedure to build and run the sample application with aocl progress feature: -============================================================================= - -1) The scalapack build system generates aocl-progress related test binaries along with test suite application as part of the build process. +1) The scalapack build system generates aocl-progress related test binaries + along with test suite application as part of the build process. Refer AOCL User guide for the scalapack build process. 2) The aocl-progress related tests generated in 'TESTING/AOCL_PROGRESS_TESTS' folder in the build folder. 3) The aocl-progress related tests can be run with the below command: - Ex: mpirun -np 4 ./xap_pdgetrf 32 8 2 2 + Eg: mpirun -np 4 ./xap_pdgetrf 32 8 2 2 mpirun -np 8 ./xap_pdgetrf 1024 32 4 2 diff --git a/TESTING/README.txt b/TESTING/README.txt new file mode 100644 index 00000000..7e92a360 --- /dev/null +++ b/TESTING/README.txt @@ -0,0 +1,39 @@ +Executing the AOCL-ScaLAPACK test suite +======================================= + +To execute AOCL-ScaLAPACK test suite against different +MPI configurations (ranks, binding, etc) you can use the script called +'/scalapack_test.sh' + +Upon running scalapack_test.sh the results will be saved in the +directory $HOME/aocl_scalapack_testing_results. The script provides +several command line options, and if no arguments are given, the +following default options will be used: + + a) MPI ranks => Maximum number of available cpu cores in the system + b) Test programs => All the programs in AOCL-ScaLAPACK test suite + will be executed + c) MPI flavour => The script will search for the mpirun executable in the + PATH variable and corresponding MPI installation will be used. + d) MPI binding, mapping => The test will be performed only with + 'map-by core' and 'bind-to core' + + +Below are some helpful examples demonstrating different options: + +Eg: To test only single precision cholesky transformation for all + the MPI mapping for ranks between 4 to 16 use + $ scalapack_test.sh -t xsllt -s 4 -i 1 -e 16 -c all + +Eg: To test all the programs with maximum avialable ranks + with MPI mapping "map-by l3cache" + $ scalapack_test.sh -t all -c map_l3cache + +To view all the supported options execute the script with argument -h + +Address Sanitizer(ASAN) testing: +================================ + +Address saitizer(ASAN) tests are supported through the AOCL-ScaLAPACK +test suite. To enable the same, include the build configure option +'-DENABLE_ASAN_TESTS=ON'. From dabc25d6d62bc6f7647578294812f70976aa9bb9 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Tue, 16 May 2023 14:32:17 +0530 Subject: [PATCH 28/30] Fix added for build issue for LLVM(Clang-16) on windows. Corrected the aocl-progress related functions with explicit return type. Signed-off-by: Nagendra AMD-Internal: [CPUPL-3411] Change-Id: I075b319fb6fb526c24cba637ca11ce1e78fa5f53 --- SRC/aocl_scalapack_progress.c | 4 ++-- SRC/aocl_scalapack_progress.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SRC/aocl_scalapack_progress.c b/SRC/aocl_scalapack_progress.c index 7aff206e..fb149394 100644 --- a/SRC/aocl_scalapack_progress.c +++ b/SRC/aocl_scalapack_progress.c @@ -20,7 +20,7 @@ void aocl_scalapack_set_progress_( aocl_scalapack_progress_callback func ) aocl_scalapack_progress_ptr_ = func; } -aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, +integer aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, const integer* current_process, const integer *total_processes) { integer ret = 0; @@ -30,7 +30,7 @@ aocl_scalapack_progress_(const char* const api, const integer *lenapi, const int return ret; } -AOCL_SCALAPACK_PROGRESS(const char* const api, const integer* lenapi, const integer* progress, +integer AOCL_SCALAPACK_PROGRESS(const char* const api, const integer* lenapi, const integer* progress, const integer* current_process, const integer* total_processes) { integer ret = 0; diff --git a/SRC/aocl_scalapack_progress.h b/SRC/aocl_scalapack_progress.h index 82b34bab..613ae21e 100644 --- a/SRC/aocl_scalapack_progress.h +++ b/SRC/aocl_scalapack_progress.h @@ -30,14 +30,14 @@ const integer *total_processes ); -aocl_scalapack_progress_( +integer aocl_scalapack_progress_( const char* const api, const integer* lenapi, const integer* progress, const integer* current_process, const integer* total_processes ); -AOCL_SCALAPACK_PROGRESS( +integer AOCL_SCALAPACK_PROGRESS( const char* const api, const integer* lenapi, const integer* progress, From dabe0fc69f0efe7ee600f3fe4fbf96e947217ef9 Mon Sep 17 00:00:00 2001 From: prangana Date: Wed, 17 May 2023 07:23:14 -0400 Subject: [PATCH 29/30] Update AMD's version of README with content specific to AMD work Change-Id: I4153e5c520ec5a0ffed495908a97d37fd7381f9a --- README_ScaLAPACK_AMD | 60 ++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/README_ScaLAPACK_AMD b/README_ScaLAPACK_AMD index 2c97560b..a2319339 100644 --- a/README_ScaLAPACK_AMD +++ b/README_ScaLAPACK_AMD @@ -1,50 +1,22 @@ -AOCL-ScaLAPACK -=============================================================================== -ScaLAPACK, or Scalable LAPACK, is a library of high performance linear algebra -routines for distributed memory computers supporting MPI. +# AOCL-ScaLAPACK -AOCL-ScaLAPACK is the optimized version of ScaLAPACK for AMD EPYC family of -processors. +AOCL-ScaLAPACK is a library of high-performance linear algebra routines for +parallel distributed memory machines. It can be used to solve linear systems, +least squares problems, eigenvalue problems, and singular value problems. -=============================================================================== +AOCL-ScaLAPACK is forked from upstream Netlib ScaLAPACK GitHub +[repository](https://github.com/Reference-ScaLAPACK/scalapack). This fork has +ScaLAPACK optimized for AMD “Zen” core based processors. It depends on external +libraries BLAS and LAPACK. For AMD CPUs, use of AOCL-BLIS and AOCL-libFLAME is +recommended. -1. Install MPI library and set the PATH and LD_LIBRARY_PATH environment - variables to point to installed binaries. - eg. export PATH=/bin:$PATH - eg. export LD_LIBRARY_PATH=/lib:$LD_LIBRARY_PATH +For detailed instructions on how to configure, build, install, and link against +AOCL-ScaLAPACK on AMD CPUs, please refer to the AOCL User Guide located on AMD +developer [portal](https://www.amd.com/en/developer/aocl.html). -2. Download AMD optimized versions of BLIS and libFLAME from following link - https://developer.amd.com/amd-aocl/ +For any issues/suggestion in the "amd" fork of ScaLAPACK, please email +toolchainsupport@amd.com. -3. Install latest CMAKE tool. - -4. Install AOCL-BLIS and AOCL-libFLAME libraries either using pre-built binaries or build - from source. - To build AOCL-BLIS and AOCL-libFLAME from source, clone from following github links - BLIS: https://github.com/amd/blis - libFLAME: https://github.com/amd/libflame - -5. Steps to build the AOCL-ScaLAPACK library and the test suite: - - a. Create a new directory. For example, build: - $ mkdir build - $ cd build - - b. Set PATH and LD_LIBRARY_PATH appropriately to the MPI installation. - - c. To Build the AOCL-ScaLAPACK library and the test suite, Run the below commands: - $ cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp /libblis-mt.a" - -DLAPACK_LIBRARIES="-lstdc++ /libflame.a" - -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 - -DUSE_OPTIMIZED_LAPACK_BLAS=OFF [-D DENABLE_ILP64=ON] - $ make -j - - This command generates the AOCL-ScaLAPACK library in the 'build/lib' folder - and test applications in the 'build/TESTING' folder. - -4. To execute the AOCL-ScaLAPACK test suite, run scalapack_test.sh from - 'build/' directory: - - $ ./scalapack_test.sh - Refer TESTING/README.txt to know more about scalapack_test.sh +Also, please read the LICENSE file for information on copying and distributing +this software. From 9b1f832d8248ddbf2662c56d3196c36304ee8bc0 Mon Sep 17 00:00:00 2001 From: nprasadm Date: Wed, 24 May 2023 14:04:35 +0530 Subject: [PATCH 30/30] Version string updated for 4.1.0 Signed-off-by: Nagendra Change-Id: Ifa78deee378b7b818e1ae3feebadc3b78a23e857 --- SRC/get_aocl_scalapack_version.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/SRC/get_aocl_scalapack_version.c b/SRC/get_aocl_scalapack_version.c index 4de98b69..1d42b3d6 100644 --- a/SRC/get_aocl_scalapack_version.c +++ b/SRC/get_aocl_scalapack_version.c @@ -3,7 +3,7 @@ /* --------------------------------------------------------------------- * * -- AOCL ScaLAPACK routine -- -* Copyright (c) 2020-2022 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2020-2023 Advanced Micro Devices, Inc.  All rights reserved. * * --------------------------------------------------------------------- */ @@ -28,7 +28,7 @@ void get_aocl_scalapack_version_( version, ver_str_len ) #endif { #ifdef AOCL_SCALAPACK_VERSION - char slmainversion[] = "AOCL-ScaLAPACK 4.1.0 Beta "; + char slmainversion[] = "AOCL-ScaLAPACK 4.1.0 "; char slversion[1000]; char scalapackversion[] = ", supports ScaLAPACK 2.2.0"; int length, i; @@ -54,8 +54,8 @@ void get_aocl_scalapack_version_( version, ver_str_len ) *ver_str_len = length; strcpy(version, slversion); #else - strcpy(version, "AOCL-ScaLAPACK 4.1.0 Beta, supports ScaLAPACK 2.2.0"); - *ver_str_len = strlen("AOCL-ScaLAPACK 4.1.0 Beta, supports ScaLAPACK 2.2.0"); + strcpy(version, "AOCL-ScaLAPACK 4.1.0, supports ScaLAPACK 2.2.0"); + *ver_str_len = strlen("AOCL-ScaLAPACK 4.1.0, supports ScaLAPACK 2.2.0"); #endif return; }