diff --git a/AOCL_DTL/README.md b/AOCL_DTL/README.md index 5da5150a..a3e63b21 100644 --- a/AOCL_DTL/README.md +++ b/AOCL_DTL/README.md @@ -1,19 +1,19 @@ -############################################################################### -Guidelines to enable logging and tracing in ScaLAPACK library -############################################################################### - -Following are the steps to enable Trace and Log. - -1. Open header file "aocl-scalapack/AOCL_DTL/aocldtlcf.h" - i. Enable Trace by making the following macro as 1 : - #define AOCL_DTL_TRACE_ENABLE 1 - ii. Enable Log by making the following macro as 1 : - #define AOCL_DTL_LOG_ENABLE 1 - -2. After Step 1, configure the cmake with -DENABLE_DTL=ON config option to enable AOCL DTL feature. - For Example: cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp libblis-mt.a" -DLAPACK_LIBRARIES=/libflame.a -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 -DUSE_OPTIMIZED_LAPACK_BLAS=OFF -DENABLE_DTL=ON - -3. Currently the DTL is supported only for the LU factorization API 'pdgetrf'. - -4. After the ScaLAPACK test suite is built (Refer the latest AOCL-userGuide for the build steps), execute the LU test application (with command: "mpirun ./xdlu" ) to get the DTL trace, log files. - For Example: "P31243_T31243_aocldtl_trace.txt" and "P31243_T31243_aocldtl_log.txt". +############################################################################### +Guidelines to enable logging and tracing in ScaLAPACK library +############################################################################### + +Following are the steps to enable Trace and Log. + +1. Open header file "aocl-scalapack/AOCL_DTL/aocldtlcf.h" + i. Enable Trace by making the following macro as 1 : + #define AOCL_DTL_TRACE_ENABLE 1 + ii. Enable Log by making the following macro as 1 : + #define AOCL_DTL_LOG_ENABLE 1 + +2. After Step 1, configure the cmake with -DENABLE_DTL=ON config option to enable AOCL DTL feature. + For Example: cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp libblis-mt.a" -DLAPACK_LIBRARIES=/libflame.a -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 -DUSE_OPTIMIZED_LAPACK_BLAS=OFF -DENABLE_DTL=ON + +3. Currently the DTL is supported only for the LU factorization API 'pdgetrf'. + +4. After the ScaLAPACK test suite is built (Refer the latest AOCL-userGuide for the build steps), execute the LU test application (with command: "mpirun ./xdlu" ) to get the DTL trace, log files. + For Example: "P31243_T31243_aocldtl_trace.txt" and "P31243_T31243_aocldtl_log.txt". diff --git a/AOCL_DTL/aocldtl.c b/AOCL_DTL/aocldtl.c index 4096f6a8..941aa8a7 100644 --- a/AOCL_DTL/aocldtl.c +++ b/AOCL_DTL/aocldtl.c @@ -1,489 +1,488 @@ -/*=================================================================== - * File Name : aocldtl.c - * - * Description : This file contains main logging functions. - * These functions are invoked though macros by - * end user. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#include "aocltpdef.h" -#include "aocldtl.h" -#include "aoclfal.h" -#include "aocldtlcf.h" -#include "aoclflist.h" -#include "aoclos.h" - -#ifdef AOCL_DTL_AUTO_TRACE_ENABLE -#if defined(__linux__) -#define __USE_GNU -#include -#endif -#endif - -/* By default the trace level will be set to ALL User can configure this - parameter at run time using command line argument */ -uint32 gui32TraceLogLevel = AOCL_DTL_TRACE_LEVEL; - -/* The user can configure the file name in which he wants to dump the data */ -#if AOCL_DTL_TRACE_ENABLE -/* The file name for storing traced log added manually in the code */ -static char *pchDTL_TRACE_FILE = AOCL_DTL_TRACE_FILE; - -/* Global file pointer for trace logging */ -AOCL_FLIST_Node *gpTraceFileList = NULL; - -#endif - -#if AOCL_DTL_LOG_ENABLE -/* The file name for storing log data */ -static char *pchDTL_LOG_FILE = AOCL_DTL_LOG_FILE; - -/* Global file pointer for logging the results */ -AOCL_FLIST_Node *gpLogFileList = NULL; -#endif - -#if AOCL_DTL_AUTO_TRACE_ENABLE - -/* The file name for storing execution trace, - These files are used by compiler assisted execution testing */ -static char *pchDTL_AUTO_TRACE_FILE = AOCL_DTL_AUTO_TRACE_FILE; - -/* Global file pointer for logging the results */ -AOCL_FLIST_Node *gpAutoTraceFileList = NULL; -#endif - -/*=================================================================== -* Function Name : DTL_Initialize -* Description : Creates/Opens log file and initializes the -* global trace log level -* Input Parameter(s) : ui32CurrentLogLevel - current log level -* which user can configure at run time -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#ifdef AOCL_DTL_INITIALIZE_ENABLE - -void DTL_Initialize( - uint32 ui32CurrentLogLevel) -{ - /* If user selects invalid trace log level then the dafault trace log level - will be AOCL_DTL_LEVEL_ALL */ - if ((ui32CurrentLogLevel < 1) || (ui32CurrentLogLevel > AOCL_DTL_LEVEL_ALL)) - { - gui32TraceLogLevel = AOCL_DTL_LEVEL_ALL; - } - else - { - /* Assign the user requested log level to the global trace log level */ - gui32TraceLogLevel = ui32CurrentLogLevel; - } - -#if AOCL_DTL_TRACE_ENABLE - /* Create/Open the file to log the traced data */ - AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); - - if (NULL == gpTraceFileList) - { - /* Unable to open the specified file.*/ - AOCL_DEBUGPRINT("Unable to create the trace file %s\n", pchDTL_TRACE_FILE); - return; - } -#endif - -#if AOCL_DTL_LOG_ENABLE - /* Create/Open the file to log the log data */ - AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); - - if (NULL == gpLogFileList) - { - /* Unable to open the specified file.*/ - AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_LOG_FILE); - return; - } -#endif - -#if AOCL_DTL_AUTO_TRACE_ENABLE - /* Create/Open the file to log the log data */ - AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); - - if (NULL == gpAutoTraceFileList) - { - /* Unable to open the specified file.*/ - AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_AUTO_TRACE_FILE); - return; - } -#endif - -} /* DTL_Initialize */ -#endif - -/*=================================================================== -* Function Name : DTL_Uninitialize -* Description : Close all the log files -* Input Parameter(s) : void -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#ifdef AOCL_DTL_INITIALIZE_ENABLE -void DTL_Uninitialize(void) -{ -#if AOCL_DTL_TRACE_ENABLE - /* Close the trace file */ - AOCL_FLIST_CloseAll(gpTraceFileList); -#endif - -#if AOCL_DTL_LOG_ENABLE - /* Close the log file */ - AOCL_FLIST_CloseAll(gpLogFileList); -#endif - -#if AOCL_DTL_AUTO_TRACE_ENABLE - /* Close the log file */ - AOCL_FLIST_CloseAll(gpAutoTraceFileList); -#endif - return; -} /* DTL_Uninitialise */ -#endif - -/*=================================================================== -* Function Name : DTL_Trace -* Description : This is common lowest level function -* to log the event to a file, This function -* will take case of choosing correct file -* according to the current thread and -* log the event as per format requested. - -* Input Parameter(s) : ui8LogLevel - Log Level -* ui8LogType - Identify log type (entry, exit etc) -* pi8FileName.- File name -* pi8FunctionName - Function Name -* ui32LineNumber - Line number -* pi8Message - Message to be printed -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) -void DTL_Trace( - uint8 ui8LogLevel, - uint8 ui8LogType, - const int8 *pi8FileName, - const int8 *pi8FunctionName, - uint32 ui32LineNumber, - const int8 *pi8Message) -{ - uint8 i = 0; - AOCL_FAL_FILE *pOutFile = NULL; - - if (ui8LogType == TRACE_TYPE_LOG || ui8LogType == TRACE_TYPE_RAW) - { -#if AOCL_DTL_LOG_ENABLE - pOutFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to the file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } -#endif /* Logging enabled */ - } - else - { -#if AOCL_DTL_TRACE_ENABLE - pOutFile = AOCL_FLIST_GetFile(gpTraceFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } -#endif /* Trace Enabled */ - } - - /* Log the message only if the log level is less than or equal to global log - level set while initialization */ - if (ui8LogLevel <= gui32TraceLogLevel) - { - - /* Indent as per level if is function call trace */ - if ((ui8LogLevel >= AOCL_DTL_LEVEL_TRACE_1) && - (ui8LogLevel <= AOCL_DTL_LEVEL_TRACE_8)) - { - /* this loop is for formating the output log file */ - for (i = 0; i < (ui8LogLevel - AOCL_DTL_LEVEL_TRACE_1); i++) - { - /* print tabs in the output file */ - fprintf(pOutFile, "\t"); - } - } - - switch (ui8LogType) - { - case TRACE_TYPE_FENTRY: - fprintf(pOutFile, "In %s()...\n", pi8FunctionName); - break; - - case TRACE_TYPE_FEXIT: - if (pi8Message == NULL) - { /* Function returned successfully */ - fprintf(pOutFile, "Out of %s()\n", pi8FunctionName); - } - else - { /* Function failed to complete, use message to get error */ - fprintf(pOutFile, "Out of %s() with error %s\n", pi8FunctionName, pi8Message); - } - break; - - case TRACE_TYPE_LOG: - fprintf(pOutFile, "%s:%d:%s\n", pi8FileName, ui32LineNumber, pi8Message); - break; - - case TRACE_TYPE_RAW: - fprintf(pOutFile, "%s\n", pi8Message); - break; - } - fflush(pOutFile); - } -} /* DTL_Data_Trace_Entry */ -#endif - -/*=================================================================== -* Function Name : DTL_DumpData -* Description : This function is mainly used for dumping -* the data into the file -* Input Parameter(s) : pui8Buffer - the buffer to be dumped -* ui32BufferSize.- the no. of bytes to be dumped -* ui8DataType - the data type char/int32/int32 -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -#if AOCL_DTL_DUMP_ENABLE -void DTL_DumpData( - uint8 ui8LogLevel, - void *pvBuffer, - uint32 ui32BufferSize, - uint8 ui8DataType, - int8 *pi8Message, - int8 i8OutputType) -{ - uint32 j; - - /* Pointer to store the buffer */ - uint32 *pui32Array, ui32LocalData; - uint16 *pui16Array; - uint8 *pui8CharArray; - int8 *pi8CharString; - - /* If dump (log) file pointer is equal to NULL return with out dumping data to file */ - AOCL_FAL_FILE *pDumpFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); - /* Log the message only if the log level is less than or equal to global log - level set while initialization */ - if (ui8LogLevel > gui32TraceLogLevel) - { - return; - } - - /* The string message */ - if (pi8Message != NULL) - { - fprintf(pDumpFile, "%s :", pi8Message); - } - - /* Assuming that if the Data type for character = 1 - * the Data type for uint32 = 2 - * the data type for uint32 = 4 - * the data type for string = 3 - */ - if (ui8DataType == AOCL_STRING_DATA_TYPE) - { - /* Typecast the void buffer to character buffer */ - pi8CharString = (int8 *)pvBuffer; - fprintf(pDumpFile, "%s", pi8CharString); - fprintf(pDumpFile, "\n"); - } - - if (ui8DataType == AOCL_CHAR_DATA_TYPE) - { - /* Typecast the void buffer to character buffer */ - pui8CharArray = (uint8 *)pvBuffer; - - for (j = 0; j < ui32BufferSize; j++) - { - if (i8OutputType == AOCL_LOG_HEX_VALUE) - { - fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui8CharArray[j]); - } - else - { - fprintf(pDumpFile, "\n\t%5d:%u", j, pui8CharArray[j]); - } - } - fprintf(pDumpFile, "\n"); - } - - if (ui8DataType == AOCL_UINT16_DATA_TYPE) - { - /* Typecast the void buffer to uint32 bit buffer */ - pui16Array = (uint16 *)pvBuffer; - - /* dump the data in the file line by line */ - for (j = 0; j < ui32BufferSize; j++) - { - if (i8OutputType == AOCL_LOG_HEX_VALUE) - { - fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui16Array[j]); - } - else - { - fprintf(pDumpFile, "\n\t%5d:%u", j, pui16Array[j]); - } - } - fprintf(pDumpFile, "\n"); - - } /* End of if */ - - if (ui8DataType == AOCL_UINT32_DATA_TYPE) - { - /* Typecast the void buffer to uint32 buffer */ - pui32Array = (uint32 *)pvBuffer; - - /* dump the data in the file line by line */ - for (j = 0; j < ui32BufferSize; j++) - { - ui32LocalData = pui32Array[j]; - - if (i8OutputType == AOCL_LOG_HEX_VALUE) - { - fprintf(pDumpFile, "\n\t%5d:0x%x", j, ui32LocalData); - } - else - { - fprintf(pDumpFile, "\n\t%5d:%u", j, ui32LocalData); - } - } - fprintf(pDumpFile, "\n"); - } /* End of if */ - fflush(pDumpFile); -} /* DTL_DumpData */ -#endif - -/* This is enabled by passing ETRACE_ENABLE=1 to make */ -#ifdef AOCL_DTL_AUTO_TRACE_ENABLE - -/* - Disable intrumentation for these functions as they will also be - called from compiler generated instumation code to trace - function execution. - - It needs to be part of declration in the C file so can't be - moved to header file. - - WARNING: These functions are automatically invoked. however any function - called from this should have instumtation disable to avoid recursive - calls which results in hang/crash. - */ -void __cyg_profile_func_enter(void *this_fn, void *call_site) __attribute__((no_instrument_function)); -void __cyg_profile_func_exit(void *this_fn, void *call_site) __attribute__((no_instrument_function)); - -/*=================================================================== -* Function Name : __cyg_profile_func_enter -* Description : This function is automatically invoked -* by compiler instrumntation when the flow -* enters a function. -* Input Parameter(s) : pvThisFunc - Address of function entered. -* call_site.- Address of the caller -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -void __cyg_profile_func_enter(void *pvThisFunc, void *pvCaller) -{ - Dl_info info; - dladdr(pvThisFunc, &info); - - AOCL_FAL_FILE *pOutFile = NULL; - - pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to the file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } - - fprintf(pOutFile, "\n%lu:+:%p", - AOCL_getTimestamp(), - (void *)(pvThisFunc - info.dli_fbase)); - fflush(pOutFile); -} - -/*=================================================================== -* Function Name : __cyg_profile_func_exit -* Description : This function is automatically invoked -* by compiler before returing from a -* function. -* Input Parameter(s) : pvThisFunc - Address of function to be existed. -* call_site.- Address of the caller -* Output Parameter(s) : None -* Return parameter(s) : None -*==================================================================*/ -void __cyg_profile_func_exit(void *pvThisFunc, void *pvCaller) -{ - Dl_info info; - dladdr(pvThisFunc, &info); - AOCL_FAL_FILE *pOutFile = NULL; - - pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); - - /* If trace file pointer is equal to NULL then return with out dumping data - to the file */ - if (NULL == pOutFile) - { - /* It might be the first call from the current thread, try to create - new trace for this thread. */ - pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); - - if (NULL == pOutFile) - { - AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); - return; - } - } - - fprintf(pOutFile, "\n%lu:-:%p", - AOCL_getTimestamp(), - (void *)(pvThisFunc - info.dli_fbase)); - fflush(pOutFile); -} - -#endif /* AOCL_AUTO_TRACE_ENABLE */ - -/* ------------------ End of aocldtl.c ---------------------- */ +/*=================================================================== + * File Name : aocldtl.c + * + * Description : This file contains main logging functions. + * These functions are invoked though macros by + * end user. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#include "aocltpdef.h" +#include "aocldtl.h" +#include "aoclfal.h" +#include "aocldtlcf.h" +#include "aoclflist.h" +#include "aoclos.h" + +#ifdef AOCL_DTL_AUTO_TRACE_ENABLE +#if defined(__linux__) +#define __USE_GNU +#include +#endif +#endif + +/* By default the trace level will be set to ALL User can configure this + parameter at run time using command line argument */ +uint32 gui32TraceLogLevel = AOCL_DTL_TRACE_LEVEL; + +/* The user can configure the file name in which he wants to dump the data */ +#if AOCL_DTL_TRACE_ENABLE +/* The file name for storing traced log added manually in the code */ +static char *pchDTL_TRACE_FILE = AOCL_DTL_TRACE_FILE; + +/* Global file pointer for trace logging */ +AOCL_FLIST_Node *gpTraceFileList = NULL; + +#endif + +#if AOCL_DTL_LOG_ENABLE +/* The file name for storing log data */ +static char *pchDTL_LOG_FILE = AOCL_DTL_LOG_FILE; + +/* Global file pointer for logging the results */ +AOCL_FLIST_Node *gpLogFileList = NULL; +#endif + +#if AOCL_DTL_AUTO_TRACE_ENABLE + +/* The file name for storing execution trace, + These files are used by compiler assisted execution testing */ +static char *pchDTL_AUTO_TRACE_FILE = AOCL_DTL_AUTO_TRACE_FILE; + +/* Global file pointer for logging the results */ +AOCL_FLIST_Node *gpAutoTraceFileList = NULL; +#endif + +/*=================================================================== +* Function Name : DTL_Initialize +* Description : Creates/Opens log file and initializes the +* global trace log level +* Input Parameter(s) : ui32CurrentLogLevel - current log level +* which user can configure at run time +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#ifdef AOCL_DTL_INITIALIZE_ENABLE + +void DTL_Initialize( + uint32 ui32CurrentLogLevel) +{ + /* If user selects invalid trace log level then the dafault trace log level + will be AOCL_DTL_LEVEL_ALL */ + if ((ui32CurrentLogLevel < 1) || (ui32CurrentLogLevel > AOCL_DTL_LEVEL_ALL)) + { + gui32TraceLogLevel = AOCL_DTL_LEVEL_ALL; + } + else + { + /* Assign the user requested log level to the global trace log level */ + gui32TraceLogLevel = ui32CurrentLogLevel; + } + +#if AOCL_DTL_TRACE_ENABLE + /* Create/Open the file to log the traced data */ + AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); + + if (NULL == gpTraceFileList) + { + /* Unable to open the specified file.*/ + AOCL_DEBUGPRINT("Unable to create the trace file %s\n", pchDTL_TRACE_FILE); + return; + } +#endif + +#if AOCL_DTL_LOG_ENABLE + /* Create/Open the file to log the log data */ + AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); + + if (NULL == gpLogFileList) + { + /* Unable to open the specified file.*/ + AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_LOG_FILE); + return; + } +#endif + +#if AOCL_DTL_AUTO_TRACE_ENABLE + /* Create/Open the file to log the log data */ + AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); + + if (NULL == gpAutoTraceFileList) + { + /* Unable to open the specified file.*/ + AOCL_DEBUGPRINT("Unable to create the log file %s\n", pchDTL_AUTO_TRACE_FILE); + return; + } +#endif + +} /* DTL_Initialize */ +#endif + +/*=================================================================== +* Function Name : DTL_Uninitialize +* Description : Close all the log files +* Input Parameter(s) : void +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#ifdef AOCL_DTL_INITIALIZE_ENABLE +void DTL_Uninitialize(void) +{ +#if AOCL_DTL_TRACE_ENABLE + /* Close the trace file */ + AOCL_FLIST_CloseAll(gpTraceFileList); +#endif + +#if AOCL_DTL_LOG_ENABLE + /* Close the log file */ + AOCL_FLIST_CloseAll(gpLogFileList); +#endif + +#if AOCL_DTL_AUTO_TRACE_ENABLE + /* Close the log file */ + AOCL_FLIST_CloseAll(gpAutoTraceFileList); +#endif + return; +} /* DTL_Uninitialise */ +#endif + +/*=================================================================== +* Function Name : DTL_Trace +* Description : This is common lowest level function +* to log the event to a file, This function +* will take case of choosing correct file +* according to the current thread and +* log the event as per format requested. + +* Input Parameter(s) : ui8LogLevel - Log Level +* ui8LogType - Identify log type (entry, exit etc) +* pi8FileName.- File name +* pi8FunctionName - Function Name +* ui32LineNumber - Line number +* pi8Message - Message to be printed +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) +void DTL_Trace( + uint8 ui8LogLevel, + uint8 ui8LogType, + const int8 *pi8FileName, + const int8 *pi8FunctionName, + uint32 ui32LineNumber, + const int8 *pi8Message) +{ + uint8 i = 0; + AOCL_FAL_FILE *pOutFile = NULL; + + if (ui8LogType == TRACE_TYPE_LOG || ui8LogType == TRACE_TYPE_RAW) + { +#if AOCL_DTL_LOG_ENABLE + pOutFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to the file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_LOG_FILE, &gpLogFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } +#endif /* Logging enabled */ + } + else + { +#if AOCL_DTL_TRACE_ENABLE + pOutFile = AOCL_FLIST_GetFile(gpTraceFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_TRACE_FILE, &gpTraceFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } +#endif /* Trace Enabled */ + } + + /* Log the message only if the log level is less than or equal to global log + level set while initialization */ + if (ui8LogLevel <= gui32TraceLogLevel) + { + + /* Indent as per level if is function call trace */ + if ((ui8LogLevel >= AOCL_DTL_LEVEL_TRACE_1) && + (ui8LogLevel <= AOCL_DTL_LEVEL_TRACE_8)) + { + /* this loop is for formating the output log file */ + for (i = 0; i < (ui8LogLevel - AOCL_DTL_LEVEL_TRACE_1); i++) + { + /* print tabs in the output file */ + fprintf(pOutFile, "\t"); + } + } + + switch (ui8LogType) + { + case TRACE_TYPE_FENTRY: + fprintf(pOutFile, "In %s()...\n", pi8FunctionName); + break; + + case TRACE_TYPE_FEXIT: + if (pi8Message == NULL) + { /* Function returned successfully */ + fprintf(pOutFile, "Out of %s()\n", pi8FunctionName); + } + else + { /* Function failed to complete, use message to get error */ + fprintf(pOutFile, "Out of %s() with error %s\n", pi8FunctionName, pi8Message); + } + break; + case TRACE_TYPE_LOG: + fprintf(pOutFile, "%s:%d,%s\n", pi8FileName, ui32LineNumber, pi8Message); + break; + + case TRACE_TYPE_RAW: + fprintf(pOutFile, "%s\n", pi8Message); + break; + } + fflush(pOutFile); + } +} /* DTL_Data_Trace_Entry */ +#endif + +/*=================================================================== +* Function Name : DTL_DumpData +* Description : This function is mainly used for dumping +* the data into the file +* Input Parameter(s) : pui8Buffer - the buffer to be dumped +* ui32BufferSize.- the no. of bytes to be dumped +* ui8DataType - the data type char/int32/int32 +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +#if AOCL_DTL_DUMP_ENABLE +void DTL_DumpData( + uint8 ui8LogLevel, + void *pvBuffer, + uint32 ui32BufferSize, + uint8 ui8DataType, + int8 *pi8Message, + int8 i8OutputType) +{ + uint32 j; + + /* Pointer to store the buffer */ + uint32 *pui32Array, ui32LocalData; + uint16 *pui16Array; + uint8 *pui8CharArray; + int8 *pi8CharString; + + /* If dump (log) file pointer is equal to NULL return with out dumping data to file */ + AOCL_FAL_FILE *pDumpFile = AOCL_FLIST_GetFile(gpLogFileList, AOCL_gettid()); + /* Log the message only if the log level is less than or equal to global log + level set while initialization */ + if (ui8LogLevel > gui32TraceLogLevel) + { + return; + } + + /* The string message */ + if (pi8Message != NULL) + { + fprintf(pDumpFile, "%s :", pi8Message); + } + + /* Assuming that if the Data type for character = 1 + * the Data type for uint32 = 2 + * the data type for uint32 = 4 + * the data type for string = 3 + */ + if (ui8DataType == AOCL_STRING_DATA_TYPE) + { + /* Typecast the void buffer to character buffer */ + pi8CharString = (int8 *)pvBuffer; + fprintf(pDumpFile, "%s", pi8CharString); + fprintf(pDumpFile, "\n"); + } + + if (ui8DataType == AOCL_CHAR_DATA_TYPE) + { + /* Typecast the void buffer to character buffer */ + pui8CharArray = (uint8 *)pvBuffer; + + for (j = 0; j < ui32BufferSize; j++) + { + if (i8OutputType == AOCL_LOG_HEX_VALUE) + { + fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui8CharArray[j]); + } + else + { + fprintf(pDumpFile, "\n\t%5d:%u", j, pui8CharArray[j]); + } + } + fprintf(pDumpFile, "\n"); + } + + if (ui8DataType == AOCL_UINT16_DATA_TYPE) + { + /* Typecast the void buffer to uint32 bit buffer */ + pui16Array = (uint16 *)pvBuffer; + + /* dump the data in the file line by line */ + for (j = 0; j < ui32BufferSize; j++) + { + if (i8OutputType == AOCL_LOG_HEX_VALUE) + { + fprintf(pDumpFile, "\n\t%5d:0x%x", j, pui16Array[j]); + } + else + { + fprintf(pDumpFile, "\n\t%5d:%u", j, pui16Array[j]); + } + } + fprintf(pDumpFile, "\n"); + + } /* End of if */ + + if (ui8DataType == AOCL_UINT32_DATA_TYPE) + { + /* Typecast the void buffer to uint32 buffer */ + pui32Array = (uint32 *)pvBuffer; + + /* dump the data in the file line by line */ + for (j = 0; j < ui32BufferSize; j++) + { + ui32LocalData = pui32Array[j]; + + if (i8OutputType == AOCL_LOG_HEX_VALUE) + { + fprintf(pDumpFile, "\n\t%5d:0x%x", j, ui32LocalData); + } + else + { + fprintf(pDumpFile, "\n\t%5d:%u", j, ui32LocalData); + } + } + fprintf(pDumpFile, "\n"); + } /* End of if */ + fflush(pDumpFile); +} /* DTL_DumpData */ +#endif + +/* This is enabled by passing ETRACE_ENABLE=1 to make */ +#ifdef AOCL_DTL_AUTO_TRACE_ENABLE + +/* + Disable intrumentation for these functions as they will also be + called from compiler generated instumation code to trace + function execution. + + It needs to be part of declration in the C file so can't be + moved to header file. + + WARNING: These functions are automatically invoked. however any function + called from this should have instumtation disable to avoid recursive + calls which results in hang/crash. + */ +void __cyg_profile_func_enter(void *this_fn, void *call_site) __attribute__((no_instrument_function)); +void __cyg_profile_func_exit(void *this_fn, void *call_site) __attribute__((no_instrument_function)); + +/*=================================================================== +* Function Name : __cyg_profile_func_enter +* Description : This function is automatically invoked +* by compiler instrumntation when the flow +* enters a function. +* Input Parameter(s) : pvThisFunc - Address of function entered. +* call_site.- Address of the caller +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +void __cyg_profile_func_enter(void *pvThisFunc, void *pvCaller) +{ + Dl_info info; + dladdr(pvThisFunc, &info); + + AOCL_FAL_FILE *pOutFile = NULL; + + pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to the file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } + + fprintf(pOutFile, "\n%lu:+:%p", + AOCL_getTimestamp(), + (void *)(pvThisFunc - info.dli_fbase)); + fflush(pOutFile); +} + +/*=================================================================== +* Function Name : __cyg_profile_func_exit +* Description : This function is automatically invoked +* by compiler before returing from a +* function. +* Input Parameter(s) : pvThisFunc - Address of function to be existed. +* call_site.- Address of the caller +* Output Parameter(s) : None +* Return parameter(s) : None +*==================================================================*/ +void __cyg_profile_func_exit(void *pvThisFunc, void *pvCaller) +{ + Dl_info info; + dladdr(pvThisFunc, &info); + AOCL_FAL_FILE *pOutFile = NULL; + + pOutFile = AOCL_FLIST_GetFile(gpAutoTraceFileList, AOCL_gettid()); + + /* If trace file pointer is equal to NULL then return with out dumping data + to the file */ + if (NULL == pOutFile) + { + /* It might be the first call from the current thread, try to create + new trace for this thread. */ + pOutFile = AOCL_FLIST_AddFile(pchDTL_AUTO_TRACE_FILE, &gpAutoTraceFileList, AOCL_gettid()); + + if (NULL == pOutFile) + { + AOCL_DEBUGPRINT("File does not exists to dump the trace data \n"); + return; + } + } + + fprintf(pOutFile, "\n%lu:-:%p", + AOCL_getTimestamp(), + (void *)(pvThisFunc - info.dli_fbase)); + fflush(pOutFile); +} + +#endif /* AOCL_AUTO_TRACE_ENABLE */ + +/* ------------------ End of aocldtl.c ---------------------- */ diff --git a/AOCL_DTL/aocldtl.h b/AOCL_DTL/aocldtl.h index bbd610e3..9e8af18b 100644 --- a/AOCL_DTL/aocldtl.h +++ b/AOCL_DTL/aocldtl.h @@ -1,169 +1,169 @@ -/*=================================================================== - * File Name : aocldtl.h - * - * Description : This is main interface file for the end user - * It provides defination for all macros to be - * used by user to add debug/trace information. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#ifndef _AOCLDTL_H_ -#define _AOCLDTL_H_ - -#include "aocldtlcf.h" -#include "aocltpdef.h" -#include "aoclflist.h" - -#define TRACE_TYPE_FENTRY (1) -#define TRACE_TYPE_FEXIT (2) -#define TRACE_TYPE_LOG (3) -#define TRACE_TYPE_RAW (4) - -/* Type definition for printf */ -#define AOCL_DEBUGPRINT printf - -/* Customization for scalapack */ -#if AOCL_DTL_LOG_ENABLE - #define BUFF_SIZE 256 - #define BUFFER buffer - /*Variable Argument macro for snprintf*/ - #define AOCL_DTL_SNPRINTF(...) snprintf(BUFFER,BUFF_SIZE,__VA_ARGS__) - -#else - #define AOCL_DTL_SNPRINTF(...) - -#endif - - -/* Define the AOCL_DTL_INITIALIZE_ENABLE if any of the debug macro - * are defined */ -#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_DUMP_ENABLE || AOCL_DTL_LOG_ENABLE) -#define AOCL_DTL_INITIALIZE_ENABLE -#endif - -#if AOCL_DTL_TRACE_ENABLE -/* Entry macro to trace the flow of control The parameter LogLevel specifies - the log level String will preferably contains the function name in which - this macro is invoked */ -#define AOCL_DTL_TRACE_ENTRY(LogLevel) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_FENTRY, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - NULL); -#else -/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ -#define AOCL_DTL_TRACE_ENTRY(LogLevel) -#endif - -#if AOCL_DTL_TRACE_ENABLE -/* Exit macro to trace the flow of control The parameter LogLevel specifies - log level String will preferably contains the function name in which this - macro is invoked */ -#define AOCL_DTL_TRACE_EXIT(LogLevel) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_FEXIT, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - NULL); - -#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_FEXIT, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - Message); -#else -/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ -#define AOCL_DTL_TRACE_EXIT(LogLevel) -#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) -#endif - -#if AOCL_DTL_DUMP_ENABLE -/* Macro to Dump the DATA The parameters Buffer contains the data to be - dumped BufferSize specifies the no. of bytes to be dumped DataType - specifies the data type of Buffer */ -#define AOCL_DTL_DUMP(LogLevel, Buffer, BufferSize, DataType, String, OutputType) \ - /* Call the Dump function to Dump the DATA */ \ - DTL_DumpData(LogLevel, \ - Buffer, \ - BufferSize, \ - DataType, \ - String, \ - OutputType); -#else -/* Dummy macro definition if the AOCL_DTL_DUMP_ENABLE macro is not enabled */ -#define AOCL_DTL_DUMP(Buffer, BufferSize, DataType, String, OutputType) - -#endif - -#if AOCL_DTL_LOG_ENABLE -/* Macro to log the Data */ -#define AOCL_DTL_LOG(LogLevel, Message) \ - DTL_Trace(LogLevel, \ - TRACE_TYPE_LOG, \ - __FILE__, \ - __FUNCTION__, \ - __LINE__, \ - Message); -#else -/* Dummy macro definition if the AOCL_DTL_LOG_ENABLE macro is not enabled */ -#define AOCL_DTL_LOG(LogLevel, Message) -#endif - -/* Macro to initialize the prerequisite for debuging */ -#ifdef AOCL_DTL_INITIALIZE_ENABLE -#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) \ - DTL_Initialize(CURRENT_LOG_LEVEL); -#else -/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ -#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) -#endif - -/* Macro for uninitializing the prerequisite */ -#ifdef AOCL_DTL_INITIALIZE_ENABLE -#define AOCL_DTL_UNINITIALIZE() \ - DTL_Uninitialize(); -#else -/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ -#define AOCL_DTL_UNINITIALIZE() -#endif - -#ifdef AOCL_DTL_INITIALIZE_ENABLE -/* Prototypes for initializing and uninitializing the debug functions */ -void DTL_Initialize( - uint32 ui32CurrentLogLevel); -void DTL_Uninitialize(void); -#endif - -#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) -/* Debug trace Function protoypes */ -void DTL_Trace( - uint8 ui8LogLevel, - uint8 ui8LogType, - const int8 *pi8FileName, - const int8 *pi8FunctionName, - uint32 ui32LineNumber, - const int8 *pi8Message); - -#endif - -#if AOCL_DTL_DUMP_ENABLE -/* Function Prototype for dumping the data */ -void DTL_DumpData( - uint8 ui8LogLevel, - void *pvBuffer, - uint32 ui32BufferSize, - uint8 ui8DataType, - int8 *pi8Message, - int8 i8OutputType); -#endif - -#endif /* _AOCLDTL_H_ */ - -/* --------------- End of aocldtl.h ----------------- */ +/*=================================================================== + * File Name : aocldtl.h + * + * Description : This is main interface file for the end user + * It provides defination for all macros to be + * used by user to add debug/trace information. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#ifndef _AOCLDTL_H_ +#define _AOCLDTL_H_ + +#include "aocldtlcf.h" +#include "aocltpdef.h" +#include "aoclflist.h" + +#define TRACE_TYPE_FENTRY (1) +#define TRACE_TYPE_FEXIT (2) +#define TRACE_TYPE_LOG (3) +#define TRACE_TYPE_RAW (4) + +/* Type definition for printf */ +#define AOCL_DEBUGPRINT printf + +/* Customization for scalapack */ +#if AOCL_DTL_LOG_ENABLE + #define BUFF_SIZE 256 + #define BUFFER buffer + /*Variable Argument macro for snprintf*/ + #define AOCL_DTL_SNPRINTF(...) snprintf(BUFFER,BUFF_SIZE,__VA_ARGS__) + +#else + #define AOCL_DTL_SNPRINTF(...) + +#endif + + +/* Define the AOCL_DTL_INITIALIZE_ENABLE if any of the debug macro + * are defined */ +#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_DUMP_ENABLE || AOCL_DTL_LOG_ENABLE) +#define AOCL_DTL_INITIALIZE_ENABLE +#endif + +#if AOCL_DTL_TRACE_ENABLE +/* Entry macro to trace the flow of control The parameter LogLevel specifies + the log level String will preferably contains the function name in which + this macro is invoked */ +#define AOCL_DTL_TRACE_ENTRY(LogLevel) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_FENTRY, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + NULL); +#else +/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ +#define AOCL_DTL_TRACE_ENTRY(LogLevel) +#endif + +#if AOCL_DTL_TRACE_ENABLE +/* Exit macro to trace the flow of control The parameter LogLevel specifies + log level String will preferably contains the function name in which this + macro is invoked */ +#define AOCL_DTL_TRACE_EXIT(LogLevel) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_FEXIT, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + NULL); + +#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_FEXIT, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + Message); +#else +/* Dummy macro definition if the AOCL_DTL_TRACE_ENABLE macro is not enabled */ +#define AOCL_DTL_TRACE_EXIT(LogLevel) +#define AOCL_DTL_TRACE_EXIT_ERR(LogLevel, Message) +#endif + +#if AOCL_DTL_DUMP_ENABLE +/* Macro to Dump the DATA The parameters Buffer contains the data to be + dumped BufferSize specifies the no. of bytes to be dumped DataType + specifies the data type of Buffer */ +#define AOCL_DTL_DUMP(LogLevel, Buffer, BufferSize, DataType, String, OutputType) \ + /* Call the Dump function to Dump the DATA */ \ + DTL_DumpData(LogLevel, \ + Buffer, \ + BufferSize, \ + DataType, \ + String, \ + OutputType); +#else +/* Dummy macro definition if the AOCL_DTL_DUMP_ENABLE macro is not enabled */ +#define AOCL_DTL_DUMP(Buffer, BufferSize, DataType, String, OutputType) + +#endif + +#if AOCL_DTL_LOG_ENABLE +/* Macro to log the Data */ +#define AOCL_DTL_LOG(LogLevel, Message) \ + DTL_Trace(LogLevel, \ + TRACE_TYPE_LOG, \ + __FILE__, \ + __FUNCTION__, \ + __LINE__, \ + Message); +#else +/* Dummy macro definition if the AOCL_DTL_LOG_ENABLE macro is not enabled */ +#define AOCL_DTL_LOG(LogLevel, Message) +#endif + +/* Macro to initialize the prerequisite for debuging */ +#ifdef AOCL_DTL_INITIALIZE_ENABLE +#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) \ + DTL_Initialize(CURRENT_LOG_LEVEL); +#else +/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ +#define AOCL_DTL_INITIALIZE(CURRENT_LOG_LEVEL) +#endif + +/* Macro for uninitializing the prerequisite */ +#ifdef AOCL_DTL_INITIALIZE_ENABLE +#define AOCL_DTL_UNINITIALIZE() \ + DTL_Uninitialize(); +#else +/* Dummy macro definition if the AOCL_DTL_INITIALIZE macro is not enabled */ +#define AOCL_DTL_UNINITIALIZE() +#endif + +#ifdef AOCL_DTL_INITIALIZE_ENABLE +/* Prototypes for initializing and uninitializing the debug functions */ +void DTL_Initialize( + uint32 ui32CurrentLogLevel); +void DTL_Uninitialize(void); +#endif + +#if (AOCL_DTL_TRACE_ENABLE || AOCL_DTL_LOG_ENABLE) +/* Debug trace Function protoypes */ +void DTL_Trace( + uint8 ui8LogLevel, + uint8 ui8LogType, + const int8 *pi8FileName, + const int8 *pi8FunctionName, + uint32 ui32LineNumber, + const int8 *pi8Message); + +#endif + +#if AOCL_DTL_DUMP_ENABLE +/* Function Prototype for dumping the data */ +void DTL_DumpData( + uint8 ui8LogLevel, + void *pvBuffer, + uint32 ui32BufferSize, + uint8 ui8DataType, + int8 *pi8Message, + int8 i8OutputType); +#endif + +#endif /* _AOCLDTL_H_ */ + +/* --------------- End of aocldtl.h ----------------- */ diff --git a/AOCL_DTL/aocldtlcf.h b/AOCL_DTL/aocldtlcf.h index 6f9cd945..b7b101bc 100644 --- a/AOCL_DTL/aocldtlcf.h +++ b/AOCL_DTL/aocldtlcf.h @@ -1,76 +1,76 @@ -/*=================================================================== - * File Name : aocldtlcf.h - * - * Description : This is configuration file for debug and trace - * libaray, all debug features (except auto trace) - * can be enabled/disabled in this file. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#ifndef _AOCLDTLCF_H_ -#define _AOCLDTLCF_H_ - -/* Macro for tracing the log If the user wants to enable tracing he has to - enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_TRACE_ENABLE 0 - -/* Macro for dumping the log If the user wants to enable dumping he has to - enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_DUMP_ENABLE 0 - -/* Macro for logging the logs If the user wants to enable loging information he - has to enable this macro by making it to 1 else 0 */ -#define AOCL_DTL_LOG_ENABLE 0 - -/* Select the trace level till which you want to log the data */ -/* By default it will log for all levels */ -#define AOCL_DTL_TRACE_LEVEL AOCL_DTL_LEVEL_TRACE_5 - -/* user has to explicitly use the below macros to identify - ciriticality of the logged message */ -#define AOCL_DTL_LEVEL_ALL (14) -#define AOCL_DTL_LEVEL_TRACE_8 (13) -#define AOCL_DTL_LEVEL_TRACE_7 (12) /* Kernels */ -#define AOCL_DTL_LEVEL_TRACE_6 (11) -#define AOCL_DTL_LEVEL_TRACE_5 (10) -#define AOCL_DTL_LEVEL_TRACE_4 (9) -#define AOCL_DTL_LEVEL_TRACE_3 (8) -#define AOCL_DTL_LEVEL_TRACE_2 (7) -#define AOCL_DTL_LEVEL_TRACE_1 (6) /* BLIS/BLAS API */ -#define AOCL_DTL_LEVEL_VERBOSE (5) -#define AOCL_DTL_LEVEL_INFO (4) -#define AOCL_DTL_LEVEL_MINOR (3) -#define AOCL_DTL_LEVEL_MAJOR (2) -#define AOCL_DTL_LEVEL_CRITICAL (1) - - -#define AOCL_DTL_TRACE_FILE "aocldtl_trace.txt" -#define AOCL_DTL_AUTO_TRACE_FILE "aocldtl_auto_trace.rawfile" -#define AOCL_DTL_LOG_FILE "aocldtl_log.txt" - -/* The use can use below three macros for different data type while dumping data - * or specify the size of data type in bytes macro for character data type */ -#define AOCL_CHAR_DATA_TYPE (1) - -/* macro for short data type */ -#define AOCL_UINT16_DATA_TYPE (2) - -/* macro for String data type */ -#define AOCL_STRING_DATA_TYPE (3) - -/* macro for uint32 data type */ -#define AOCL_UINT32_DATA_TYPE (4) - -/* macro for printing Hex values */ -#define AOCL_LOG_HEX_VALUE ('x') - -/* macro for printing Decimal values */ -#define AOCL_LOG_DECIMAL_VALUE ('d') - - - -#endif /* _AOCLDTLCF_H_ */ - -/* --------------- End of aocldtlcf.h ----------------- */ +/*=================================================================== + * File Name : aocldtlcf.h + * + * Description : This is configuration file for debug and trace + * libaray, all debug features (except auto trace) + * can be enabled/disabled in this file. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#ifndef _AOCLDTLCF_H_ +#define _AOCLDTLCF_H_ + +/* Macro for tracing the log If the user wants to enable tracing he has to + enable this macro by making it to 1 else 0 */ +#define AOCL_DTL_TRACE_ENABLE 1 + +/* Macro for dumping the log If the user wants to enable dumping he has to + enable this macro by making it to 1 else 0 */ +#define AOCL_DTL_DUMP_ENABLE 1 + +/* Macro for logging the logs If the user wants to enable loging information he + has to enable this macro by making it to 1 else 0 */ +#define AOCL_DTL_LOG_ENABLE 1 + +/* Select the trace level till which you want to log the data */ +/* By default it will log for all levels */ +#define AOCL_DTL_TRACE_LEVEL AOCL_DTL_LEVEL_TRACE_5 + +/* user has to explicitly use the below macros to identify + ciriticality of the logged message */ +#define AOCL_DTL_LEVEL_ALL (14) +#define AOCL_DTL_LEVEL_TRACE_8 (13) +#define AOCL_DTL_LEVEL_TRACE_7 (12) /* Kernels */ +#define AOCL_DTL_LEVEL_TRACE_6 (11) +#define AOCL_DTL_LEVEL_TRACE_5 (10) +#define AOCL_DTL_LEVEL_TRACE_4 (9) +#define AOCL_DTL_LEVEL_TRACE_3 (8) +#define AOCL_DTL_LEVEL_TRACE_2 (7) +#define AOCL_DTL_LEVEL_TRACE_1 (6) /* BLIS/BLAS API */ +#define AOCL_DTL_LEVEL_VERBOSE (5) +#define AOCL_DTL_LEVEL_INFO (4) +#define AOCL_DTL_LEVEL_MINOR (3) +#define AOCL_DTL_LEVEL_MAJOR (2) +#define AOCL_DTL_LEVEL_CRITICAL (1) + + +#define AOCL_DTL_TRACE_FILE "aocldtl_trace.txt" +#define AOCL_DTL_AUTO_TRACE_FILE "aocldtl_auto_trace.rawfile" +#define AOCL_DTL_LOG_FILE "aocldtl_log.txt" + +/* The use can use below three macros for different data type while dumping data + * or specify the size of data type in bytes macro for character data type */ +#define AOCL_CHAR_DATA_TYPE (1) + +/* macro for short data type */ +#define AOCL_UINT16_DATA_TYPE (2) + +/* macro for String data type */ +#define AOCL_STRING_DATA_TYPE (3) + +/* macro for uint32 data type */ +#define AOCL_UINT32_DATA_TYPE (4) + +/* macro for printing Hex values */ +#define AOCL_LOG_HEX_VALUE ('x') + +/* macro for printing Decimal values */ +#define AOCL_LOG_DECIMAL_VALUE ('d') + + + +#endif /* _AOCLDTLCF_H_ */ + +/* --------------- End of aocldtlcf.h ----------------- */ diff --git a/AOCL_DTL/aoclfal.c b/AOCL_DTL/aoclfal.c index a317e69c..1eadf99b 100644 --- a/AOCL_DTL/aoclfal.c +++ b/AOCL_DTL/aoclfal.c @@ -1,265 +1,265 @@ -/*=================================================================== - * File Name : aoclfal.c - * - * Description : Platform/os independed file handling API's - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#include "aocltpdef.h" -#include "aocldtl.h" -#include "aoclfal.h" - - - -/* Disable instrumentation for following function, since they are called from - * Auto Generated execution trace handlers. */ - -/* The FAL function declaration */ -int32 AOCL_FAL_Close( - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -int32 AOCL_FAL_Error( - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -AOCL_FAL_FILE *AOCL_FAL_Open( - const int8 *pchFileName, - const int8 *pchMode) __attribute__((no_instrument_function)); - -int32 AOCL_FAL_Read( - void *pvBuffer, - int32 i32Size, - int32 i32Count, - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -int32 AOCL_FAL_Write( - const void *pvBuffer, - int32 i32Size, - int32 iCount, - AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); - -/*============================================================================= -* Function Name : AOCL_FAL_Open -* Description : Used for opening a file specified by name -* Input Parameter(s) : int8 *pchFileName - Stores the file name (path) -* int8 *pchMode - Specify the mode for opening file -* Output Parameter(s) : None -* Return parameter(s) : AOCL_FAL_FILE - If the file is opened successfully -* NULL - If there is any error while opening file -*============================================================================*/ -AOCL_FAL_FILE *AOCL_FAL_Open( - const int8 *pchFileName, - const int8 *pchMode) -{ - AOCL_FAL_FILE *fpFileOpen = NULL; - /* Open the file with provided by specified path and mode in which it should - be opened. Refer to FILE I/O operation help for getting mode types */ - fpFileOpen = fopen(pchFileName, pchMode); - /* If the file is not opened then NULL value should be returned */ - if (NULL == fpFileOpen) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Cannot open file: AOCL_FAL_Open()"); - } - return fpFileOpen; -} /* end of AOCL_FAL_Open */ - -/*============================================================================= -* Function Name : AOCL_FAL_Close -* Description : Used for closing a file specified by file pointer -* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer -* Output Parameter(s) : None -* Return parameter(s) : 0 - If the file is closed successfully -* AOCL_FAL_CLOSE_ERROR - For any error while closing file -* -*============================================================================*/ -int32 AOCL_FAL_Close( - AOCL_FAL_FILE *fpFilePointer) -{ - /* Return value for the file close */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_CLOSE_ERROR; - - /* Check whether the file pointer passed is valid or not */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not close file: AOCL_FAL_Close()"); - return i32RetVal; - } - - /* Close the file using the FILE pointer passed */ - i32RetVal = fclose(fpFilePointer); - - /* If the return value is non zero then it indicates an error */ - if (i32RetVal) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "Can't close file, Invalid file pointer passed"); - return i32RetVal; - } - - /* On successful closing of the file, function should return 0 */ - return i32RetVal; - -} /* End of AOCL_FAL_Close */ - -/*============================================================================= -* Function Name : AOCL_FAL_Read -* Description : Used for reading a file specified by file pointer. -* This function reads the specified number of bytes -* from the file into the buffer specified. The bytes -* read are returned by this function. -* Input Parameter(s) : int32 i32Size - Item size in bytes -* int32 i32Count - Maximum number of items to be read -* AOCL_FAL_FILE *fpFilePointer - File ptr to read from -* Output Parameter(s) : void *pvBuffer - Storage location of data -* Return parameter(s) : i32RetVal - Number of bytes read if successful -* AOCL_FAL_READ_ERROR - In case of error while reading -*============================================================================*/ -int32 AOCL_FAL_Read( - void *pvBuffer, - int32 i32Size, - int32 i32Count, - AOCL_FAL_FILE *fpFilePointer) -{ - /* Return value for the file read */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_READ_ERROR; - - /* Check pointer used for pointing the storage location data is valid */ - if (NULL == pvBuffer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "Can not read the file, Buffer pointer is NULL"); - return i32RetVal; - } - - /* Check whether file pointer passed is valid */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "Can not read the file, Buffer pointer is NULL"); - return i32RetVal; - } - - /* Read the file using file pointer */ - i32RetVal = fread(pvBuffer, i32Size, i32Count, fpFilePointer); - - if (i32RetVal != i32Count) - { - /* Check whether this is an end of file The AOCL_FAL_Error() will return - non-zero value to indicate an error */ - if (AOCL_FAL_Error(fpFilePointer)) /* AOCL_FAL_EndOfFile (fpFilePointer) */ - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, - "There is an error condition while file read"); - i32RetVal = AOCL_FAL_READ_ERROR; - } - /* This is condition where file read has encountered an end of file */ - else - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "End of file..."); - } - } - - /* The number of bytes read by the file read operation. - * This value may be less than the actual count, due to end of file - * or an error while reading the file */ - return i32RetVal; - -} /* End of AOCL_FAL_Read */ - -/*============================================================================= -* Function Name : AOCL_FAL_Write -* Description : Used for writing data to a file specified by file -* pointer. The number of bytes written to file are -* written by this function. -* Input Parameter(s) : const void *pvBuffer - Pointer to data location from -* where the data to be copied - int32 i32Size - Item size in bytes -* int32 i32Count - Maximum number of items to be -* written -* AOCL_FAL_FILE *fpFilePointer - File pointer to write to -* Output Parameter(s) : None -* Return parameter(s) : i32RetVal - Number of bytes written if successful -* AOCL_FAL_WRITE_ERROR - In case of error while writing -*============================================================================*/ -int32 AOCL_FAL_Write( - const void *pvBuffer, - int32 i32Size, - int32 iCount, - AOCL_FAL_FILE *fpFilePointer) -{ - /* Return value for write operation */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_WRITE_ERROR; - /* Check pointer used for pointing the storage location data is valid */ - if (NULL == pvBuffer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); - return i32RetVal; - } - - /* Check whether the file pointer passed is valid or not */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); - return i32RetVal; - } - - /* Write into the file specified by the file pointer */ - i32RetVal = fwrite(pvBuffer, i32Size, iCount, fpFilePointer); - - /* If the number of bytes written into the file are less than specified - * bytes then it is an error while file writing */ - if (i32RetVal != iCount) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "File write operation error"); - i32RetVal = AOCL_FAL_WRITE_ERROR; - } - - /* The return value of the file write operation */ - return i32RetVal; - -} /* End of AOCL_FAL_Write */ - -/*============================================================================= -* Function Name : AOCL_FAL_Error -* Description : Used for testing an error on the file specified -* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer -* Output Parameter(s) : None -* Return parameter(s) : non-zero - Indicates an end of file -* 0 - Indicates that function is successful -* non-zero - Indicates that there is some error -* AOCL_FAL_ERROR - Indicates error during the operation -*============================================================================*/ -int32 AOCL_FAL_Error( - AOCL_FAL_FILE *fpFilePointer) -{ - /* Used for storing the return value for ferror function */ - int32 i32RetVal; - i32RetVal = AOCL_FAL_FERROR; - - /* Check whether the file pointer is NULL */ - if (NULL == fpFilePointer) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Invalid file pointer is passed"); - return i32RetVal; - } - - /* Call the ferror function to get an error on the file */ - i32RetVal = ferror(fpFilePointer); - - /* Check for the return value, it non-zero there is an error */ - if (i32RetVal) - { - AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "The file has some error"); - i32RetVal = AOCL_FAL_FERROR; - } - - /* In case of success, this function should return 0 */ - return i32RetVal; - -} /* End of AOCL_FAL_Error */ - -/* ------------------- End of aoclfal.c ----------------------- */ +/*=================================================================== + * File Name : aoclfal.c + * + * Description : Platform/os independed file handling API's + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#include "aocltpdef.h" +#include "aocldtl.h" +#include "aoclfal.h" + + + +/* Disable instrumentation for following function, since they are called from + * Auto Generated execution trace handlers. */ + +/* The FAL function declaration */ +int32 AOCL_FAL_Close( + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +int32 AOCL_FAL_Error( + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +AOCL_FAL_FILE *AOCL_FAL_Open( + const int8 *pchFileName, + const int8 *pchMode) __attribute__((no_instrument_function)); + +int32 AOCL_FAL_Read( + void *pvBuffer, + int32 i32Size, + int32 i32Count, + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +int32 AOCL_FAL_Write( + const void *pvBuffer, + int32 i32Size, + int32 iCount, + AOCL_FAL_FILE *fpFilePointer) __attribute__((no_instrument_function)); + +/*============================================================================= +* Function Name : AOCL_FAL_Open +* Description : Used for opening a file specified by name +* Input Parameter(s) : int8 *pchFileName - Stores the file name (path) +* int8 *pchMode - Specify the mode for opening file +* Output Parameter(s) : None +* Return parameter(s) : AOCL_FAL_FILE - If the file is opened successfully +* NULL - If there is any error while opening file +*============================================================================*/ +AOCL_FAL_FILE *AOCL_FAL_Open( + const int8 *pchFileName, + const int8 *pchMode) +{ + AOCL_FAL_FILE *fpFileOpen = NULL; + /* Open the file with provided by specified path and mode in which it should + be opened. Refer to FILE I/O operation help for getting mode types */ + fpFileOpen = fopen(pchFileName, pchMode); + /* If the file is not opened then NULL value should be returned */ + if (NULL == fpFileOpen) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Cannot open file: AOCL_FAL_Open()"); + } + return fpFileOpen; +} /* end of AOCL_FAL_Open */ + +/*============================================================================= +* Function Name : AOCL_FAL_Close +* Description : Used for closing a file specified by file pointer +* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer +* Output Parameter(s) : None +* Return parameter(s) : 0 - If the file is closed successfully +* AOCL_FAL_CLOSE_ERROR - For any error while closing file +* +*============================================================================*/ +int32 AOCL_FAL_Close( + AOCL_FAL_FILE *fpFilePointer) +{ + /* Return value for the file close */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_CLOSE_ERROR; + + /* Check whether the file pointer passed is valid or not */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not close file: AOCL_FAL_Close()"); + return i32RetVal; + } + + /* Close the file using the FILE pointer passed */ + i32RetVal = fclose(fpFilePointer); + + /* If the return value is non zero then it indicates an error */ + if (i32RetVal) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "Can't close file, Invalid file pointer passed"); + return i32RetVal; + } + + /* On successful closing of the file, function should return 0 */ + return i32RetVal; + +} /* End of AOCL_FAL_Close */ + +/*============================================================================= +* Function Name : AOCL_FAL_Read +* Description : Used for reading a file specified by file pointer. +* This function reads the specified number of bytes +* from the file into the buffer specified. The bytes +* read are returned by this function. +* Input Parameter(s) : int32 i32Size - Item size in bytes +* int32 i32Count - Maximum number of items to be read +* AOCL_FAL_FILE *fpFilePointer - File ptr to read from +* Output Parameter(s) : void *pvBuffer - Storage location of data +* Return parameter(s) : i32RetVal - Number of bytes read if successful +* AOCL_FAL_READ_ERROR - In case of error while reading +*============================================================================*/ +int32 AOCL_FAL_Read( + void *pvBuffer, + int32 i32Size, + int32 i32Count, + AOCL_FAL_FILE *fpFilePointer) +{ + /* Return value for the file read */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_READ_ERROR; + + /* Check pointer used for pointing the storage location data is valid */ + if (NULL == pvBuffer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "Can not read the file, Buffer pointer is NULL"); + return i32RetVal; + } + + /* Check whether file pointer passed is valid */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "Can not read the file, Buffer pointer is NULL"); + return i32RetVal; + } + + /* Read the file using file pointer */ + i32RetVal = fread(pvBuffer, i32Size, i32Count, fpFilePointer); + + if (i32RetVal != i32Count) + { + /* Check whether this is an end of file The AOCL_FAL_Error() will return + non-zero value to indicate an error */ + if (AOCL_FAL_Error(fpFilePointer)) /* AOCL_FAL_EndOfFile (fpFilePointer) */ + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, + "There is an error condition while file read"); + i32RetVal = AOCL_FAL_READ_ERROR; + } + /* This is condition where file read has encountered an end of file */ + else + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "End of file..."); + } + } + + /* The number of bytes read by the file read operation. + * This value may be less than the actual count, due to end of file + * or an error while reading the file */ + return i32RetVal; + +} /* End of AOCL_FAL_Read */ + +/*============================================================================= +* Function Name : AOCL_FAL_Write +* Description : Used for writing data to a file specified by file +* pointer. The number of bytes written to file are +* written by this function. +* Input Parameter(s) : const void *pvBuffer - Pointer to data location from +* where the data to be copied + int32 i32Size - Item size in bytes +* int32 i32Count - Maximum number of items to be +* written +* AOCL_FAL_FILE *fpFilePointer - File pointer to write to +* Output Parameter(s) : None +* Return parameter(s) : i32RetVal - Number of bytes written if successful +* AOCL_FAL_WRITE_ERROR - In case of error while writing +*============================================================================*/ +int32 AOCL_FAL_Write( + const void *pvBuffer, + int32 i32Size, + int32 iCount, + AOCL_FAL_FILE *fpFilePointer) +{ + /* Return value for write operation */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_WRITE_ERROR; + /* Check pointer used for pointing the storage location data is valid */ + if (NULL == pvBuffer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); + return i32RetVal; + } + + /* Check whether the file pointer passed is valid or not */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Can not perform file write"); + return i32RetVal; + } + + /* Write into the file specified by the file pointer */ + i32RetVal = fwrite(pvBuffer, i32Size, iCount, fpFilePointer); + + /* If the number of bytes written into the file are less than specified + * bytes then it is an error while file writing */ + if (i32RetVal != iCount) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "File write operation error"); + i32RetVal = AOCL_FAL_WRITE_ERROR; + } + + /* The return value of the file write operation */ + return i32RetVal; + +} /* End of AOCL_FAL_Write */ + +/*============================================================================= +* Function Name : AOCL_FAL_Error +* Description : Used for testing an error on the file specified +* Input Parameter(s) : AOCL_FAL_FILE *fpFilePointer - File pointer +* Output Parameter(s) : None +* Return parameter(s) : non-zero - Indicates an end of file +* 0 - Indicates that function is successful +* non-zero - Indicates that there is some error +* AOCL_FAL_ERROR - Indicates error during the operation +*============================================================================*/ +int32 AOCL_FAL_Error( + AOCL_FAL_FILE *fpFilePointer) +{ + /* Used for storing the return value for ferror function */ + int32 i32RetVal; + i32RetVal = AOCL_FAL_FERROR; + + /* Check whether the file pointer is NULL */ + if (NULL == fpFilePointer) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "Invalid file pointer is passed"); + return i32RetVal; + } + + /* Call the ferror function to get an error on the file */ + i32RetVal = ferror(fpFilePointer); + + /* Check for the return value, it non-zero there is an error */ + if (i32RetVal) + { + AOCL_DTL_LOG(AOCL_DTL_LEVEL_MAJOR, "The file has some error"); + i32RetVal = AOCL_FAL_FERROR; + } + + /* In case of success, this function should return 0 */ + return i32RetVal; + +} /* End of AOCL_FAL_Error */ + +/* ------------------- End of aoclfal.c ----------------------- */ diff --git a/AOCL_DTL/aoclfal.h b/AOCL_DTL/aoclfal.h index 1e392733..56931d2d 100644 --- a/AOCL_DTL/aoclfal.h +++ b/AOCL_DTL/aoclfal.h @@ -1,51 +1,51 @@ -/*=================================================================== - * File Name : aoclfal.h - * - * Description : Interfaces for platform/os independed file - * handling API's - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ - -#ifndef _AOCL_FAL_H_ -#define _AOCL_FAL_H_ - -/* The possible error values of FAL */ -#define AOCL_FAL_SUCCESS 0 -#define AOCL_FAL_CLOSE_ERROR -1 -#define AOCL_FAL_READ_ERROR -2 -#define AOCL_FAL_WRITE_ERROR -3 -#define AOCL_FAL_EOF_ERROR -6 -#define AOCL_FAL_FERROR -7 -#include "aocltpdef.h" - -/* The type definition for FILE */ -#define AOCL_FAL_FILE FILE - -/* The FAL function declaration */ -int32 AOCL_FAL_Close( - AOCL_FAL_FILE *fpFilePointer); - -int32 AOCL_FAL_Error( - AOCL_FAL_FILE *fpFilePointer); - -AOCL_FAL_FILE *AOCL_FAL_Open( - const int8 *pchFileName, - const int8 *pchMode); - -int32 AOCL_FAL_Read( - void *pvBuffer, - int32 i32Size, - int32 i32Count, - AOCL_FAL_FILE *fpFilePointer); - -int32 AOCL_FAL_Write( - const void *pvBuffer, - int32 i32Size, - int32 iCount, - AOCL_FAL_FILE *fpFilePointer); - -#endif /* _AOCL_FAL_H_ */ - -/* --------------- End of aoclfal.h ----------------- */ +/*=================================================================== + * File Name : aoclfal.h + * + * Description : Interfaces for platform/os independed file + * handling API's + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ + +#ifndef _AOCL_FAL_H_ +#define _AOCL_FAL_H_ + +/* The possible error values of FAL */ +#define AOCL_FAL_SUCCESS 0 +#define AOCL_FAL_CLOSE_ERROR -1 +#define AOCL_FAL_READ_ERROR -2 +#define AOCL_FAL_WRITE_ERROR -3 +#define AOCL_FAL_EOF_ERROR -6 +#define AOCL_FAL_FERROR -7 +#include "aocltpdef.h" + +/* The type definition for FILE */ +#define AOCL_FAL_FILE FILE + +/* The FAL function declaration */ +int32 AOCL_FAL_Close( + AOCL_FAL_FILE *fpFilePointer); + +int32 AOCL_FAL_Error( + AOCL_FAL_FILE *fpFilePointer); + +AOCL_FAL_FILE *AOCL_FAL_Open( + const int8 *pchFileName, + const int8 *pchMode); + +int32 AOCL_FAL_Read( + void *pvBuffer, + int32 i32Size, + int32 i32Count, + AOCL_FAL_FILE *fpFilePointer); + +int32 AOCL_FAL_Write( + const void *pvBuffer, + int32 i32Size, + int32 iCount, + AOCL_FAL_FILE *fpFilePointer); + +#endif /* _AOCL_FAL_H_ */ + +/* --------------- End of aoclfal.h ----------------- */ diff --git a/AOCL_DTL/aoclflist.c b/AOCL_DTL/aoclflist.c index 5bba38fb..e4c178f6 100644 --- a/AOCL_DTL/aoclflist.c +++ b/AOCL_DTL/aoclflist.c @@ -1,12 +1,12 @@ /*=================================================================== * File Name : aoclflist.c - * - * Description : Linked list of open files assocaited with + * + * Description : Linked list of open files assocaited with * each thread. This is used to log the data * to correct file as per the current thread id. * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * + * Copyright (C) 2020-23, Advanced Micro Devices, Inc + * *==================================================================*/ #include "aocltpdef.h" @@ -14,9 +14,9 @@ #include "aoclfal.h" #include "aoclflist.h" #include "aoclos.h" +#include "SL_Context.h" - -/* Disable instrumentation for following function, since they are called from +/* Disable instrumentation for following function, since they are called from * Auto Generated execution trace handlers. */ Bool AOCL_FLIST_IsEmpty( AOCL_FLIST_Node *plist) __attribute__((no_instrument_function)); @@ -89,8 +89,7 @@ AOCL_FAL_FILE *AOCL_FLIST_AddFile(const int8 *pchFilePrefix, AOCL_FLIST_Node **p } /* We don't have exiting file, lets try to open new one */ - sprintf(pchFileName, "P%d_T%d_%s", AOCL_getpid(), tid, pchFilePrefix); - + sprintf(pchFileName, "P%d_T%d_Rank%d_%s", AOCL_getpid(), tid,scalapack_context.rank, pchFilePrefix); file = AOCL_FAL_Open(pchFileName, "wb"); if (file == NULL) { diff --git a/AOCL_DTL/aocltpdef.h b/AOCL_DTL/aocltpdef.h index 896731c5..3098912c 100644 --- a/AOCL_DTL/aocltpdef.h +++ b/AOCL_DTL/aocltpdef.h @@ -1,42 +1,42 @@ - -/*=================================================================== - * File Name : aocltpdef.h - * - * Description : Abstraction for various datatypes used by DTL. - * - * Copyright (C) 2020, Advanced Micro Devices, Inc - * - *==================================================================*/ -#ifndef AOCL_TYPEDEF_H_ -#define AOCL_TYPEDEF_H_ - -#include -#include -#include -#include -#include -#ifndef _WIN32 -#include -#else -typedef int pid_t; -#endif - -typedef double Double; -typedef float Float; -typedef void Void; -typedef unsigned char uint8; -typedef unsigned short int uint16; -typedef unsigned int uint32; -typedef unsigned long uint64; -typedef uint8 *STRING; -typedef unsigned char Bool; -typedef char int8; -typedef signed long int int32; -typedef short int int16; - -typedef Void *AOCL_HANDLE; -typedef pid_t AOCL_TID; - -#endif /*AOCL_TYPEDEF_H_ */ - -/* --------------- End of aocltpdef.h ----------------- */ + +/*=================================================================== + * File Name : aocltpdef.h + * + * Description : Abstraction for various datatypes used by DTL. + * + * Copyright (C) 2020, Advanced Micro Devices, Inc + * + *==================================================================*/ +#ifndef AOCL_TYPEDEF_H_ +#define AOCL_TYPEDEF_H_ + +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#else +typedef int pid_t; +#endif + +typedef double Double; +typedef float Float; +typedef void Void; +typedef unsigned char uint8; +typedef unsigned short int uint16; +typedef unsigned int uint32; +typedef unsigned long uint64; +typedef uint8 *STRING; +typedef unsigned char Bool; +typedef char int8; +typedef signed long int int32; +typedef short int int16; + +typedef Void *AOCL_HANDLE; +typedef pid_t AOCL_TID; + +#endif /*AOCL_TYPEDEF_H_ */ + +/* --------------- End of aocltpdef.h ----------------- */ diff --git a/BLACS/SRC/Bdef.h b/BLACS/SRC/Bdef.h index ba7d99d3..da2f062c 100644 --- a/BLACS/SRC/Bdef.h +++ b/BLACS/SRC/Bdef.h @@ -30,7 +30,7 @@ typedef struct bLaCsCoNtExT BLACSCONTEXT; struct bLaCsCoNtExT { BLACSSCOPE rscp, cscp, ascp, pscp; /* row, column, all, and pt2pt scopes */ -#ifdef ENABLE_LOOK_AHEAD_FOR_LU +#ifdef ENABLE_LOOK_AHEAD_FOR_LU BLACSSCOPE lscp; /* row scope for look ahead panel */ #endif /* ENABLE_LOOK_AHEAD_FOR_LU */ BLACSSCOPE *scp; /* pointer to present scope */ @@ -77,7 +77,7 @@ struct bLaCbUfF #define AOCL_KEEP_POLLING 2 /* - * Definition of buffer type for + * Definition of buffer type for * user defined datatype communications */ typedef struct aOcLpBuFf AOCLPBUFF; @@ -557,5 +557,22 @@ Int BI_ContxtNum(BLACSCONTEXT *ctxt); #endif - +/* + * Prototypes declarations + */ +void BI_imvcopy(Int m, Int n, Int *A, Int lda, Int *buff); +void BI_ivmcopy(Int m, Int n, Int *A, Int lda, Int *buff); +void BI_smvcopy(Int m, Int n, float *A, Int lda, float *buff); +void BI_svmcopy(Int m, Int n, float *A, Int lda, float *buff); +void BI_dmvcopy(Int m, Int n, double *A, Int lda, double *buff); +void BI_dvmcopy(Int m, Int n, double *A, Int lda, double *buff); +void BI_TransDist(BLACSCONTEXT *ctxt, char scope, Int m, Int n, Int *rA, + Int *cA, Int ldrc, BI_DistType *dist, Int rdest, Int cdest); +void Cblacs_pinfo(Int *mypnum, Int *nprocs); +void blacs_gridmap_(Int *ConTxt, Int *usermap, Int *ldup, Int *nprow0, + Int *npcol0); +void Cblacs_gridinfo(Int ConTxt, Int *nprow, Int *npcol, Int *myrow, Int *mycol); +void Cblacs_abort(Int ConTxt, Int ErrNo); +void Cblacs_get(Int ConTxt, Int what, Int *val); +void Cblacs_gridmap(Int *ConTxt, Int *usermap, Int ldup, Int nprow0, Int npcol0); #endif diff --git a/CMakeLists.txt b/CMakeLists.txt index f65f0689..eef4e862 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,9 @@ -##Copyright (C) 2021-2022, Advanced Micro Devices, Inc. All rights reserved.## +##Copyright (C) 2021-2023, Advanced Micro Devices, Inc. All rights reserved.## cmake_minimum_required(VERSION 3.2) project(SCALAPACK C Fortran) # Configure the warning and code coverage suppression file -configure_file( +configure_file( "${SCALAPACK_SOURCE_DIR}/CMAKE/CTestCustom.cmake.in" "${SCALAPACK_BINARY_DIR}/CTestCustom.cmake" COPYONLY @@ -16,35 +16,91 @@ endif () # Add the CMake directory for custon CMake modules set(CMAKE_MODULE_PATH "${SCALAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH}) -#Build Options +set(SL_FRAMEWORK_INCLUDE_PATH "${SCALAPACK_SOURCE_DIR}/FRAMEWORK") + +# ------ Build Options List ----------- # ILP64 build option option(ENABLE_ILP64 "Enable ILP64 " OFF) -option(ENABLE_AOCL_PROGRESS "Enable ILP64 " OFF) -option(ENABLE_DTL "Enable ILP64 " OFF) + +# aocl progress option +option(ENABLE_AOCL_PROGRESS "Enable progress feature " OFF) + +# DTL option +option(ENABLE_DTL "Enable DTL feature " OFF) + +# ASAN testing option +option(ENABLE_ASAN_TESTS "Enable Address sanitizer tests " OFF) # Option: Include build number in the version string. option (ENABLE_SET_LIB_VERSION "Set library version" OFF) +# Option: Dynamic allocation of work buffer memory in Test code +# Helpful to test larger matrix sizes more than 2K +option (ENABLE_LARGE_MATRIX_TESTING "Dynamic allocation of work buffer memory in test code" OFF) + +# ------ Build Options List End ----------- + + if (WIN32 AND CMAKE_Fortran_COMPILER_ID MATCHES "Intel") set (CMAKE_IFORT_LIBDEPS_DIR "C:/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/compiler/lib/intel64_win" CACHE STRING "") endif() set(CMAKE_ICC_FLAGS " ") +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp" ) + +# set compile flags to enable address sanitizer (ASAN) tests +if(ENABLE_ASAN_TESTS) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fsanitize=address " ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address ") + message(STATUS " ASAN testing enabled for the scaLAPACK build ") +endif(ENABLE_ASAN_TESTS) + +# Option to enable the scaLAPACK test-suite with "Dynamic work buffer memory allocation". +if(ENABLE_LARGE_MATRIX_TESTING) + +# Configure option to customize Dynamic work buffer size "-DWORK_BUFFER_SIZE = custom_size Bytes" +# as per the machine's memory configuration. + if(WORK_BUFFER_SIZE) + message(STATUS "scaLAPACK test-suite build with custom dynamic work_buffer_size: ${WORK_BUFFER_SIZE} bytes.") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DDYNAMIC_WORK_MEM_ALLOC -DWORK_BUFFER_SIZE=${WORK_BUFFER_SIZE}" ) + else(WORK_BUFFER_SIZE) +# Build with Dynamic work buffer size = 2100000000 bytes by default. + message(STATUS "scaLAPACK test-suite build with dynamic work_buffer_size: 2100000000 bytes.") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DDYNAMIC_WORK_MEM_ALLOC -DWORK_BUFFER_SIZE=2100000000 " ) + endif(WORK_BUFFER_SIZE) + +endif(ENABLE_LARGE_MATRIX_TESTING) + if (UNIX) - if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) + if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-none -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) endif () - if ( "${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) + if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Flang") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-132 -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) + endif () + if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") + set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec -cpp -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) + elseif ("${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) + set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port -no-vec -cpp -I ${SL_FRAMEWORK_INCLUDE_PATH}/" ) endif () + if (("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") AND ( "${CMAKE_C_COMPILER}" MATCHES "icc" ) ) - set( CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") + set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") message(STATUS "Found Intel icc compiler : ${CMAKE_ICC_FLAGS} ") endif () + + if (("${CMAKE_C_COMPILER_ID}" STREQUAL "IntelLLVM") AND ( "${CMAKE_C_COMPILER}" MATCHES "icc" ) ) + set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -Wno-implicit-function-declaration") + set(CMAKE_ICC_FLAGS "${CMAKE_ICC_FLAGS} -no-vec ") + message(STATUS "Found Intel icx compiler : ${CMAKE_ICC_FLAGS} ") + endif () +else () + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -I ${SL_FRAMEWORK_INCLUDE_PATH}/ " ) endif () + # # MPI # @@ -77,8 +133,8 @@ if (MPI_FOUND) PATH_SUFFIXES bin DOC "MPI Fortran compiler.") MARK_AS_ADVANCED(MPI_Fortran_COMPILER) - - + + if ("${MPI_Fortran_COMPILER}" STREQUAL "MPI_Fortran_COMPILER-NOTFOUND") message(ERROR "--> MPI Fortran Compiler NOT FOUND (please set MPI_BASE_DIR accordingly") @@ -88,7 +144,7 @@ if (MPI_FOUND) SET(CMAKE_Fortran_COMPILER "${MPI_Fortran_COMPILER}") message(STATUS "--> Fortran Compiler : ${CMAKE_Fortran_COMPILER}") endif() - + else() message(STATUS "Found MPI_LIBRARY : ${MPI_FOUND} ") set(MPI_BASE_DIR ${MPI_BASE_DIR} CACHE PATH "MPI Path") @@ -103,16 +159,6 @@ else() message(FATAL_ERROR "--> MPI Library NOT FOUND -- please set MPI_BASE_DIR accordingly --") endif() - -if (UNIX) - if ("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Intel") - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) - endif () - if ( "${CMAKE_Fortran_COMPILER}" MATCHES "ifort" ) - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fltconsistency -fp_port" ) - endif () -endif () - macro(SCALAPACK_install_library lib) install(TARGETS ${lib} EXPORT scalapack-targets ARCHIVE DESTINATION lib${LIB_SUFFIX} @@ -121,6 +167,18 @@ macro(SCALAPACK_install_library lib) ) endmacro() +#Function to add file name for DTL purpose +function(custom_macros_for_each_scalapack_source target_library_sources) + get_target_property(src_files_list "${target_library_sources}" SOURCES) + foreach(src_file ${src_files_list}) + get_filename_component(src_file_name "${src_file}" NAME) + get_filename_component(function_name "${src_file}" NAME_WLE) + set_property( + SOURCE "${src_file}" APPEND + PROPERTY COMPILE_DEFINITIONS "FUNCTION_NAME=\"${function_name}\"" "FILE_NAME=\"${src_file_name}\"" ) + endforeach() +endfunction() + # -------------------------------------------------- # Testing SET(DART_TESTING_TIMEOUT 600) @@ -162,19 +220,16 @@ MESSAGE(STATUS "=========") # Compiler Flags option(USE_F2C "Use FORTRAN To C compatible interface for certain Complex type functions" OFF) -set(f2cflag "-cpp ") - if(USE_F2C) - set(f2cflag "-cpp -DF2C " ) + set(f2cflag "-DF2C " ) ENDIF(USE_F2C) +MESSAGE(STATUS "CMAKE_C_COMPILER_ID = ${CMAKE_C_COMPILER_ID}") +MESSAGE(STATUS "CMAKE_Fortran_COMPILER_ID = ${CMAKE_Fortran_COMPILER_ID}") if(ENABLE_DTL) -#Enable DTL for GNU tool chain in UNIX +#Enable DTL for UNIX if(UNIX) - if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") add_definitions("-DAOCL_DTL ") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp -ffixed-line-length-none") - endif () endif() ENDIF(ENABLE_DTL) @@ -189,7 +244,7 @@ if(ENABLE_ILP64) add_definitions("-DInt=__int64" "-DENABLE_ILP64" "-DUInt=unsigned __int64" "-i8") endif() else(ENABLE_ILP64) - add_definitions("-DUInt=unsigned int " "-DInt=int") + add_definitions("-DUInt=unsigned int " "-DInt=int" ) ENDIF(ENABLE_ILP64) if(ENABLE_AOCL_PROGRESS) @@ -197,7 +252,7 @@ if(ENABLE_AOCL_PROGRESS) add_definitions("-DAOCL_PROGRESS") endif() ENDIF(ENABLE_AOCL_PROGRESS) - message(STATUS "ENABLE_SET_LIB_VERSION : ${ENABLE_SET_LIB_VERSION} ") + message(STATUS "ENABLE_SET_LIB_VERSION : ${ENABLE_SET_LIB_VERSION} ") if(ENABLE_SET_LIB_VERSION) string(TIMESTAMP TODAY "%Y%m%d") @@ -205,7 +260,8 @@ if(ENABLE_SET_LIB_VERSION) add_definitions("-DAOCL_SCALAPACK_VERSION=${LIBRARY_VERSION}") endif() -ADD_DEFINITIONS( "-D${CDEFS}" "${CMAKE_ICC_FLAGS}" "${f2cflag}") +ADD_DEFINITIONS( "-D${CDEFS}" "${f2cflag}") +set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} ${CMAKE_ICC_FLAGS} -I ${SL_FRAMEWORK_INCLUDE_PATH}/") # -------------------------------------------------- # By default static library @@ -214,7 +270,7 @@ OPTION(BUILD_STATIC_LIBS "Build static libraries" ON ) # -------------------------------------------------- # Subdirectories that need to be processed - + macro(append_subdir_files variable dirname) get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable}) foreach(depfile ${holder}) @@ -233,22 +289,27 @@ IF(BLACS_LIBRARY) include(CheckFunctionExists) set(CMAKE_REQUIRED_LIBRARIES ${BLACS_LIBRARY}) message(STATUS "--> BLACS supplied by user is ${BLACS_LIBRARY}.") - CHECK_FUNCTION_EXISTS("Cblacs_get" BLACS_FOUND) + CHECK_FUNCTION_EXISTS("Cblacs_get" CUSTOM_BLACS_FOUND) unset( CMAKE_REQUIRED_LIBRARIES ) - message(STATUS "--> BLACS routine blacs_pinfo is found: ${BLACS_FOUND}.") + message(STATUS "--> BLACS routine blacs_pinfo is found: ${CUSTOM_BLACS_FOUND}.") + + if(CUSTOM_BLACS_FOUND) + message(STATUS "--> BLACS supplied by user is WORKING, will use ${BLACS_LIBRARY}.") + else( CUSTOM_BLACS_FOUND ) + # + # BLACS + # + add_subdirectory(BLACS) + append_subdir_files(blacs "BLACS/SRC") + message(STATUS "--> BLACS supplied by user is NOT WORKING, will use BLACS source code for building aocl-scalapack") + endif( CUSTOM_BLACS_FOUND ) +ELSE(BLACS_LIBRARY) + add_subdirectory(BLACS) + append_subdir_files(blacs "BLACS/SRC") + message(STATUS "--> Using default BLACS source code for building aocl-scalapack") ENDIF() -if(BLACS_FOUND) - message(STATUS "--> BLACS supplied by user is WORKING, will use ${BLACS_LIBRARY}.") -else( BLACS_FOUND ) -# -# BLACS -# - add_subdirectory(BLACS) - append_subdir_files(blacs "BLACS/SRC") - message(STATUS "--> BLACS supplied by user is NOT WORKING, will use BLACS source code for building aocl-scalapack") -endif( BLACS_FOUND ) - +unset(LAPACK_FOUND CACHE) message(STATUS "CHECKING BLAS AND LAPACK LIBRARIES") IF(LAPACK_LIBRARIES) include(CheckFortranFunctionExists) @@ -274,15 +335,15 @@ else(LAPACK_FOUND) message(STATUS "--> LAPACK and BLAS were not found. Reference LAPACK and BLAS will be downloaded and installed") include(ExternalProject) ExternalProject_Add( - lapack - URL http://www.netlib.org/lapack/lapack.tgz - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SCALAPACK_BINARY_DIR} + lapack + URL http://www.netlib.org/lapack/lapack.tgz + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SCALAPACK_BINARY_DIR} PREFIX ${SCALAPACK_BINARY_DIR}/dependencies ) - if (UNIX) + if (UNIX) SET(LAPACK_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/liblapack.a CACHE STRING "LAPACK library" FORCE) SET(BLAS_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/libblas.a CACHE STRING "BLAS library" FORCE) - else (UNIX) # On Windows + else (UNIX) # On Windows SET(LAPACK_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/liblapack.lib CACHE STRING "LAPACK library" FORCE) SET(BLAS_LIBRARIES ${SCALAPACK_BINARY_DIR}/lib/libblas.lib CACHE STRING "BLAS library" FORCE) endif (UNIX) @@ -300,6 +361,13 @@ MESSAGE(STATUS "=========") add_subdirectory(AOCL_DTL) append_subdir_files(dtl "AOCL_DTL") +# +# FRAMEWORK +# +add_subdirectory(FRAMEWORK) +append_subdir_files(framework "FRAMEWORK") +append_subdir_files(framework-C "FRAMEWORK") + # # TOOLS # @@ -341,50 +409,52 @@ if(WIN32 AND BUILD_SHARED_LIBS) endif () if (UNIX) - if(BLACS_FOUND) - add_library(scalapack ${dtl} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + if(CUSTOM_BLACS_FOUND) + add_library(scalapack ${dtl} ${framework} ${framework-C} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + custom_macros_for_each_scalapack_source (scalapack) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) - else(BLACS_FOUND) - add_library(scalapack ${dtl} ${blacs} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + else(CUSTOM_BLACS_FOUND) + add_library(scalapack ${dtl} ${framework} ${framework-C} ${blacs} ${tools} ${tools-C} ${extra_lapack} ${pblas} ${pblas-F} ${ptzblas} ${ptools} ${pbblas} ${redist} ${src} ${src-C}) + custom_macros_for_each_scalapack_source (scalapack) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) scalapack_install_library(scalapack) - endif(BLACS_FOUND) + endif(CUSTOM_BLACS_FOUND) else (UNIX) # Need to separate Fortran and C Code if (CMAKE_C_COMPILER_ID MATCHES MSVC) # create Fortran objects and add to scalapack library first - if(BLACS_FOUND) - add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) - add_library(scalapack $ ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + if(CUSTOM_BLACS_FOUND) + add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) + add_library(scalapack $ ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - else(BLACS_FOUND) - add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) - add_library(scalapack $ ${blacs} ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + else(CUSTOM_BLACS_FOUND) + add_library(scalapack-F OBJECT ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) + add_library(scalapack $ ${blacs} ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - endif(BLACS_FOUND) + endif(CUSTOM_BLACS_FOUND) else (CMAKE_C_COMPILER_ID MATCHES Clang) # create C objects and add to scalapack library first - if(BLACS_FOUND) - add_library(scalapack-C OBJECT ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + if(CUSTOM_BLACS_FOUND) + add_library(scalapack-C OBJECT ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) - add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) + add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${dtl} ${framework} ${src} ${extra_lapack} ) target_link_libraries( scalapack ${BLACS_LIBRARY} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - else(BLACS_FOUND) - add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${pblas} ${ptools} ${redist} ${src-C}) + else(CUSTOM_BLACS_FOUND) + add_library(scalapack-C OBJECT ${blacs} ${tools-C} ${dtl} ${framework-C} ${pblas} ${ptools} ${redist} ${src-C}) target_link_libraries( scalapack-C ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack-C PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) - add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${src} ${extra_lapack} ) + add_library(scalapack $ ${pblas-F} ${pbblas} ${ptzblas} ${tools} ${framework} ${src} ${extra_lapack} ) target_link_libraries( scalapack ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MPI_LIBRARY}) target_link_directories( scalapack PUBLIC ${CMAKE_IFORT_LIBDEPS_DIR}) scalapack_install_library(scalapack) - endif(BLACS_FOUND) + endif(CUSTOM_BLACS_FOUND) endif () endif (UNIX) @@ -393,7 +463,7 @@ if(${SCALAPACK_BUILD_TESTS}) add_subdirectory(TESTING) endif() # -------------------------------------------------- -# CPACK Packaging +# CPACK Packaging SET(CPACK_PACKAGE_NAME "ScaLAPACK") SET(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd") @@ -456,3 +526,4 @@ install(FILES install(EXPORT scalapack-targets DESTINATION lib/cmake/scalapack-${SCALAPACK_VERSION}) +file(COPY scalapack_test.sh DESTINATION ${SCALAPACK_BINARY_DIR}) diff --git a/EXAMPLE/aocl_progress_example/pdgerf_example_app.c b/EXAMPLE/aocl_progress_example/pdgerf_example_app.c index 63c6db1d..b6ac9a10 100644 --- a/EXAMPLE/aocl_progress_example/pdgerf_example_app.c +++ b/EXAMPLE/aocl_progress_example/pdgerf_example_app.c @@ -1,132 +1,132 @@ -#include -#include -#include -#include -#include -#include -#include "mpi.h" - -void blacs_get_(int*, int*, int*); -void blacs_pinfo_(int*, int*); -void blacs_gridinit_(int*, char*, int*, int*); -void blacs_gridinfo_(int*, int*, int*, int*, int*); -void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); -void pdgetrf_(int*, int*, double*, int*, int*, int*, int*, int*); -void blacs_gridexit_(int*); -int numroc_(int*, int*, int*, int*, int*); - -int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes); - -int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes) -{ - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - int izero=0; - int ione=1; - int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - int n = 1000; // (Global) Matrix size - int nprow = 2; // Number of row procs - int npcol = 2; // Number of column procs - int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - int iam, nprocs; - int zero = 0; - int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - int *IPPIV; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (int *)calloc(2*n,sizeof(int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - int k = 0; - for (int j = 0; j < nqA; j++) { // local col - int l_j = j / nb; // which block - int x_j = j % nb; // where within that block - int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (int i = 0; i < mpA; i++) { // local row - int l_i = i / nb; // which block - int x_i = i % nb; // where within that block - int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - int descA[9]; - int info; - int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %d\n", info); - } - - // Run pdgetrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pdgetrf, info = %d\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#include +#include "mpi.h" + +void blacs_get_(int*, int*, int*); +void blacs_pinfo_(int*, int*); +void blacs_gridinit_(int*, char*, int*, int*); +void blacs_gridinfo_(int*, int*, int*, int*, int*); +void descinit_(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*); +void pdgetrf_(int*, int*, double*, int*, int*, int*, int*, int*); +void blacs_gridexit_(int*); +int numroc_(int*, int*, int*, int*, int*); + +int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes); + +int AOCL_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes) +{ + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + int izero=0; + int ione=1; + int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + int n = 1000; // (Global) Matrix size + int nprow = 2; // Number of row procs + int npcol = 2; // Number of column procs + int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + int iam, nprocs; + int zero = 0; + int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + int *IPPIV; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (int *)calloc(2*n,sizeof(int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + int k = 0; + for (int j = 0; j < nqA; j++) { // local col + int l_j = j / nb; // which block + int x_j = j % nb; // where within that block + int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (int i = 0; i < mpA; i++) { // local row + int l_i = i / nb; // which block + int x_i = i % nb; // where within that block + int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + int descA[9]; + int info; + int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %d\n", info); + } + + // Run pdgetrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pdgetrf, info = %d\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/FRAMEWORK/CMakeLists.txt b/FRAMEWORK/CMakeLists.txt new file mode 100644 index 00000000..f26a375f --- /dev/null +++ b/FRAMEWORK/CMakeLists.txt @@ -0,0 +1,14 @@ +##Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.## + +# ---------------------------------- +# aocl-scaLAPACK framework routines +# ---------------------------------- +set (framework-C + SL_Context.c cpu_features.c) + +set (framework + SL_Context_module.f) + +set(src ${framework-C} ${framework}) + +#set(framework ${framework-C} ${framework}) diff --git a/FRAMEWORK/SL_Context.c b/FRAMEWORK/SL_Context.c new file mode 100644 index 00000000..e4de920f --- /dev/null +++ b/FRAMEWORK/SL_Context.c @@ -0,0 +1,223 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#include "SL_Context.h" +#include +#include +#include "../BLACS/SRC/Bdef.h" + +#if defined(SCALAPACK_NO_CONTEXT) +// This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of "dummy" code that doesn't depend on POSIX threads or any other +// threading mechanism. +// NOTE: THIS CODE DOES NOT IMPLEMENT THREADING AND IS NOT THREAD-SAFE! +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex) +{ + //return pthread_mutex_lock( mutex ); + return 0; +} +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) +{ + //return pthread_mutex_unlock( mutex ); + return 0; +} +// -- pthread_once() -- +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) +{ + //pthread_once( once, init ); + return; +} +#elif defined(_MSC_VER) // !defined(FLA_DISABLE_SYSTEM) +#include +// This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of Windows API calls. +// -- pthread_mutex_*() -- +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex) +{ + AcquireSRWLockExclusive(mutex); + return 0; +} +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) +{ + ReleaseSRWLockExclusive(mutex); + return 0; +} +// -- pthread_once() -- +static BOOL + scalapack_init_once_wrapper(scalapack_pthread_once_t *once, void *param, void **context) +{ + (void)once; + (void)context; + typedef void (*callback)(void); + ((callback)param)(); + return TRUE; +} +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) +{ + InitOnceExecuteOnce(once, scalapack_init_once_wrapper, init, NULL); +} +#else // !defined(SCALAPACK_NO_CONTEXT) && !defined(_MSC_VER) +// This branch defines a pthreads-like API, scalapack_pthreads_*(), and implements it +// in terms of the corresponding pthreads_*() types, macros, and function calls. +// This branch is compiled for Linux and other non-Windows environments where +// we assume that *some* implementation of pthreads is provided (although it +// may lack barriers--see below). +// -- pthread_mutex_*() -- +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex) +{ + return pthread_mutex_lock(mutex); +} +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex) +{ + return pthread_mutex_unlock(mutex); +} +// -- pthread_once() -- +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)) +{ + pthread_once(once, init); +} +#endif // !defined(SCALAPACK_NO_CONTEXT) && !defined(_MSC_VER) +/* The global scalapack_context structure, which holds the global thread,ISA settings + Initialize with 0. +**/ +aocl_scalapack_global_context scalapack_context = {0,0,0}; +/* A mutex to allow synchronous access to global_thread. */ +scalapack_pthread_mutex_t sl_global_thread_mutex = SL_PTHREAD_MUTEX_INITIALIZER; +/******************************************************************************** + * \brief scalapack_env_get_var is a function used to query the environment + * variable and convert the string into integer and return the same + ********************************************************************************/ +int scalapack_env_get_var(const char *env, int fallback) +{ + int r_val; + char *str; + // Query the environment variable and store the result in str. + str = getenv(env); + // Set the return value based on the string obtained from getenv(). + if(str != NULL) + { + // If there was no error, convert the string to an integer and + // prepare to return that integer. + r_val = (int)strtol(str, NULL, 10); + } + else + { + // If there was an error, use the "fallback" as the return value. + r_val = fallback; + } + return r_val; +} +void scalapack_thread_init_rntm_from_env(aocl_scalapack_global_context *context) +{ + int status; + + /* Check whether 'debug trace' is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_TRACE", -1); + if (status == -1) + { + context->is_trace_enabled = 0; + } + else + { + context->is_trace_enabled = 1; + } + + /* Check whether 'debug trace' is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_LOG", -1); + if (status == -1) + { + context->is_log_enabled = 0; + } + else + { + context->is_log_enabled = 1; + } + + /* Check whether AOCL-progress requirement is set in the run-time environment */ + status = scalapack_env_get_var("AOCL_SL_PROGRESS", -1); + + if (status == -1) + { + context->is_progress_enabled = 0; + } + else + { + context->is_progress_enabled = 1; + } + + /* set the context MPI rank, number of processes */ + Cblacs_pinfo(&(context->rank), &(context->num_procs) ); + + /* Since multithreading support is not present in the aocl-scaLAPACK, + we set the context number of threads to 1. + NOTE: If multithread support is enabled, then we have to set the + desired num_threads from the environment. + */ + context->num_threads = 1; +} +// ----------------------------------------------------------------------------- +void scalapack_context_init(void) +{ + // Read the environment variables and use them to initialize the + // global runtime object. + scalapack_thread_init_rntm_from_env(&scalapack_context); +} +// ----------------------------------------------------------------------------- +void scalapack_context_finalize(void) {} +// ----------------------------------------------------------------------------- +// A pthread_once_t variable is a pthread structure used in pthread_once(). +// pthread_once() is guaranteed to execute exactly once among all threads that +// pass in this control object. Thus, we need one for initialization and a +// separate one for finalization. +static scalapack_pthread_once_t once_init = SL_PTHREAD_ONCE_INIT; +static scalapack_pthread_once_t once_finalize = SL_PTHREAD_ONCE_INIT; + +void aocl_scalapack_init_() +{ + scalapack_pthread_once(&once_init, scalapack_context_init); +} +void AOCL_SCALAPACK_INIT() +{ + scalapack_pthread_once(&once_init, scalapack_context_init); +} + +void aocl_scalapack_finalize(void) +{ + scalapack_pthread_once(&once_finalize, scalapack_context_finalize); +} +int scalapack_thread_get_num_threads(void) +{ + // We must ensure that global_rntm has been initialized. + aocl_scalapack_init_(); + return scalapack_context.num_threads; +} +void scalapack_thread_set_num_threads(int n_threads) +{ + // We must ensure that global_thread has been initialized. + aocl_scalapack_init_(); + // Acquire the mutex protecting global_thread. + scalapack_pthread_mutex_lock(&sl_global_thread_mutex); + scalapack_context.num_threads = n_threads; + // Release the mutex protecting global_thread. + scalapack_pthread_mutex_unlock(&sl_global_thread_mutex); +} diff --git a/FRAMEWORK/SL_Context.h b/FRAMEWORK/SL_Context.h new file mode 100644 index 00000000..8eaee0c6 --- /dev/null +++ b/FRAMEWORK/SL_Context.h @@ -0,0 +1,111 @@ + +/* ************************************************************************ + * Copyright (c) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef SL_CONTEXT_H +#define SL_CONTEXT_H +#include +/* -- Type and macro definitions ----------------------------------------------- */ +#if defined( SCALAPACK_NO_CONTEXT) +/* This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of "dummy" code that doesn't depend on POSIX threads or any other +// threading mechanism. +// NOTE: THIS CODE DOES NOT IMPLEMENT THREADING AND IS NOT THREAD-SAFE! +// -- pthread types -- */ +typedef int scalapack_pthread_mutex_t; +typedef int scalapack_pthread_once_t; +/* -- pthreads macros -- */ +#define SL_PTHREAD_MUTEX_INITIALIZER 0 +#define SL_PTHREAD_ONCE_INIT 0 +#elif defined(_WIN32) +/* #ifdef _MSC_VER */ /* !defined(SCALAPACK_NO_CONTEXT) */ +#include +// This branch defines a pthread-like API, scalapack_pthread_*(), and implements it +// in terms of Windows API calls. +// -- pthread types -- +typedef SRWLOCK scalapack_pthread_mutex_t; +typedef INIT_ONCE scalapack_pthread_once_t; +// -- pthreads macros -- +#define SL_PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT +#define SL_PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT +#else /* !defined(SCALAPACK_NO_CONTEXT) && !defined(_MSC_VER)*/ +#include +/* This branch defines a pthreads-like API, scalapack_pthreads_*(), and implements it + in terms of the corresponding pthreads_*() types, macros, and function calls. + -- pthread types -- */ +typedef pthread_mutex_t scalapack_pthread_mutex_t; +typedef pthread_once_t scalapack_pthread_once_t; +/* -- pthreads macros -- */ +#define SL_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define SL_PTHREAD_ONCE_INIT PTHREAD_ONCE_INIT +#endif +/* -- Function definitions ----------------------------------------------------- + -- pthread_mutex_*() -- */ +int scalapack_pthread_mutex_lock(scalapack_pthread_mutex_t *mutex); +int scalapack_pthread_mutex_unlock(scalapack_pthread_mutex_t *mutex); +/* -- pthread_once() -- */ +void scalapack_pthread_once(scalapack_pthread_once_t *once, void (*init)(void)); +/****************************************************************************************** + * \brief scalapack_context is a structure holding the below information: + 1) Enable/Disable status of DTL logging and AOCL_Progress. + 2) In future additionally following could be added to the structure: + - The number of threads + - Target CPU ISA information + char is_fma; + char is_avx2; + char is_avx512; + 3) It gets initialised by scalapack_init_once(). + *****************************************************************************************/ +typedef struct _aocl_scalapack_global_context +{ + int num_threads; /* Number of threads */ + int is_trace_enabled; /* Enable debug trace */ + int is_log_enabled; /* Enable debug log */ + int is_progress_enabled; /* AOCL-progress */ + + // MPI Variables related to rank, process info + int rank; + int num_procs; +} aocl_scalapack_global_context; + +extern aocl_scalapack_global_context scalapack_context; +typedef aocl_scalapack_global_context aocl_scalapack_global_context_; +typedef aocl_scalapack_global_context AOCL_SCALAPACK_GLOBAL_CONTEXT; +/*! \ingroup aux_module + * \brief Initialise various framework variables including + * context + * + * \retval none. + +void aocl_scalapack_init(); */ +void aocl_scalapack_init_(); +void AOCL_SCALAPACK_INIT(); +/* Alias Declarations to enable F2C calls +#define aocl_scalapack_init_ aocl_scalapack_init +#define AOCL_SCALAPACK_INIT_ aocl_scalapack_init +#define AOCL_SCALAPACK_INIT aocl_scalapack_init*/ + +/*! \ingroup aux_module + * \brief Deallocate and clean all initalized buffers + */ +void aocl_scalapack_finalize(); +#endif /* SL_CONTEXT_H */ diff --git a/FRAMEWORK/SL_Context_fortran_include.h b/FRAMEWORK/SL_Context_fortran_include.h new file mode 100644 index 00000000..a37477fe --- /dev/null +++ b/FRAMEWORK/SL_Context_fortran_include.h @@ -0,0 +1,41 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef SL_CONTEXT_FORTRAN_H +#define SL_CONTEXT_FORTRAN_H + +#if _WIN32 +#define AOCL_DTL_TRACE_ENTRY_F CALL SL_DTL_TRACE_ENTRY_F(__FILE__, __LINE__, ' ') +#define AOCL_DTL_TRACE_EXIT_F CALL SL_DTL_TRACE_EXIT_F (__FILE__, __LINE__, ' ') + +#define AOCL_DTL_LOG_ENTRY_F CALL AOCL_SL_DTL_LOG_ENTRY(__FILE__, "", __LINE__, BUFFER ) +#define aocl_scalapack_init_ AOCL_SCALAPACK_INIT +#else +#define AOCL_DTL_TRACE_ENTRY_F CALL SL_DTL_TRACE_ENTRY_F(FILE_NAME, __LINE__, ' ') +#define AOCL_DTL_TRACE_EXIT_F CALL SL_DTL_TRACE_EXIT_F (FILE_NAME, __LINE__, ' ') + +#define AOCL_DTL_LOG_ENTRY_F CALL AOCL_SL_DTL_LOG_ENTRY( FILE_NAME// C_NULL_CHAR, FUNCTION_NAME// C_NULL_CHAR, __LINE__, BUFFER ) +#endif + +#endif /* SL_CONTEXT_FORTRAN_H */ diff --git a/FRAMEWORK/SL_Context_module.f b/FRAMEWORK/SL_Context_module.f new file mode 100644 index 00000000..f0b8e5b7 --- /dev/null +++ b/FRAMEWORK/SL_Context_module.f @@ -0,0 +1,98 @@ + +* ************************************************************************ +* Copyright (c) 2023 Advanced Micro Devices, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +* +* ************************************************************************ */ + MODULE LINK_TO_C_GLOBALS + USE, INTRINSIC::ISO_C_BINDING + TYPE, BIND(C)::AOCL_SCALAPACK_GLOBAL_CONTEXT + INTEGER(C_INT)::NUM_THREADS + INTEGER(C_INT)::IS_TRACE_ENABLED + INTEGER(C_INT)::IS_LOG_ENABLED + INTEGER(C_INT)::IS_PROGRESS_ENABLED + INTEGER(C_INT)::RANK + INTEGER(C_INT)::NUM_PROCS + END TYPE + TYPE(AOCL_SCALAPACK_GLOBAL_CONTEXT),BIND(C)::SCALAPACK_CONTEXT + + END MODULE LINK_TO_C_GLOBALS + +* +* ===================================================================== +* SUBROUTINE SL_DTL_TRACE_ENTRY_F +* ===================================================================== + SUBROUTINE SL_DTL_TRACE_ENTRY_F( FILENAME, LINENUMBER, MESSAGE ) +* + USE LINK_TO_C_GLOBALS +* .. Scalar Arguments .. + INTEGER LINENUMBER +* .. +* .. Array Arguments .. + CHARACTER FILENAME( * ), MESSAGE( * ) + IF(SCALAPACK_CONTEXT%IS_TRACE_ENABLED.EQ.1) THEN + CALL AOCL_SL_DTL_TRACE_ENTRY(FILENAME, LINENUMBER, MESSAGE) + END IF + RETURN +* +* End of SL_DTL_TRACE_ENTRY_F +* + END +* +* ===================================================================== +* SUBROUTINE SL_DTL_TRACE_EXIT_F +* ===================================================================== + SUBROUTINE SL_DTL_TRACE_EXIT_F( FILENAME, LINENUMBER, MESSAGE ) +* + USE LINK_TO_C_GLOBALS +* .. Scalar Arguments .. + INTEGER LINENUMBER +* .. +* .. Array Arguments .. + CHARACTER FILENAME( * ), MESSAGE( * ) + IF( SCALAPACK_CONTEXT%IS_TRACE_ENABLED.EQ.1 ) THEN + CALL AOCL_SL_DTL_TRACE_EXIT(FILENAME, LINENUMBER, MESSAGE) + END IF + RETURN +* +* End of SL_DTL_TRACE_ENTRY_F +* + END +* +* ===================================================================== +* SUBROUTINE SL_DTL_LOG_ENTRY_F +* ===================================================================== + SUBROUTINE SL_DTL_LOG_ENTRY_F( FILENAME, LINENUMBER, MESSAGE ) +* + USE LINK_TO_C_GLOBALS +* .. Scalar Arguments .. + INTEGER LINENUMBER +* .. +* .. Array Arguments .. + CHARACTER FILENAME( * ), MESSAGE( * ) + IF(SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1) THEN + CALL AOCL_SL_DTL_TRACE_ENTRY(FILENAME, LINENUMBER, MESSAGE) + END IF + RETURN +* +* End of SL_DTL_TRACE_ENTRY_F +* + END + diff --git a/FRAMEWORK/cpu_features.c b/FRAMEWORK/cpu_features.c new file mode 100644 index 00000000..3819f142 --- /dev/null +++ b/FRAMEWORK/cpu_features.c @@ -0,0 +1,268 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#include "cpu_features.h" +#define ALC_CPU_FEATURE_REG(ftr, idx, reg) ({ \ + uint32_t val; \ + struct alc_cpuid_regs *r; \ + r = &(ftr)->available[0]; \ + val = r[(idx)].reg; \ + val; \ + }) +#define ALC_CPU_FEATURE(ptr, idx, reg, bit) ({ \ + uint32_t __reg = \ + ALC_CPU_FEATURE_REG(ptr, idx, reg); \ + (__reg & bit); \ + }) +/* For AVX512 instructions */ +#define ALC_CPU_HAS_AVX512F(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512F) /* For AVX512 foundation flag */ +#define ALC_CPU_HAS_AVX512DQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512DQ) +#define ALC_CPU_HAS_AVX512BW(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512BW) +#define ALC_CPU_HAS_AVX512ER(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512ER) +#define ALC_CPU_HAS_AVX512CD(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512CD) +#define ALC_CPU_HAS_AVX512VL(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512VL) +#define ALC_CPU_HAS_AVX512PF(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512PF) +#define ALC_CPU_HAS_AVX512_IFMA(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ebx, ALC_CPUID_BIT_AVX512_IFMA) +#define ALC_CPU_HAS_AVX512_VNNI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VNNI) +#define ALC_CPU_HAS_AVX512_BITALG(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_BITALG) +#define ALC_CPU_HAS_AVX512_VBMI(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI) +#define ALC_CPU_HAS_AVX512_VBMI2(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VBMI2) +#define ALC_CPU_HAS_AVX512_VPOPCNTDQ(f) ALC_CPU_FEATURE(f, ALC_CPUID_EAX_7, ecx, ALC_CPUID_BIT_AVX512_VPOPCNTDQ) +struct alc_cpu_features cpu_features; +struct +{ + uint32_t eax; + uint32_t ecx; +} __cpuid_values[ALC_CPUID_MAX] = { + [ALC_CPUID_EAX_1] = { 0x1, 0x0 }, /* eax = 0, ecx=0 */ + [ALC_CPUID_EAX_7] = { 0x7, 0x0 }, /* eax = 7, -"- */ + [ALC_CPUID_EAX_8_01] = { 0x80000001, 0x0 }, /* eax = 0x80000001 */ + [ALC_CPUID_EAX_8_07] = { 0x80000007, 0x0 }, /* eax = 0x80000007 */ + [ALC_CPUID_EAX_8_08] = { 0x80000008, 0x0 }, /* eax = 0x80000008 */ +}; +static inline uint32_t +__extract32(uint32_t value, int start, int length) +{ + assert(start >= 0 && length > 0 && length <= 32 - start); + return (value >> start) & (~0U >> (32 - length)); +} +static inline uint16_t +alc_cpuid_get_family(uint32_t var) +{ + return (uint16_t)(__extract32(var, 20, 8) + + __extract32(var, 8, 4)); +} +static inline uint16_t +alc_cpuid_get_model(uint32_t var) +{ + return (uint16_t)(__extract32(var, 16, 4) << 4 | + __extract32(var, 4, 4)); +} +static inline uint16_t +alc_cpuid_get_stepping(uint32_t var) +{ + return (uint16_t)(__extract32(var, 20, 8) + + __extract32(var, 8, 4)); +} +static inline void __cpuid(struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + ); +} +static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + :"0"(eax) + ); +} +static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out) +{ + __asm__ volatile + ( + "cpuid" + :"=a"(out->eax), "=b"(out->ebx), "=c"(out->ecx), "=d"(out->edx) + :"0"(eax), "2"(ecx) + ); +} +static void +__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, + struct alc_cpu_mfg_info* mfg_info) +{ + uint16_t model; + uint16_t family; + if (mfg_info) { + struct alc_cpuid_regs regs; + __cpuid_1(1, ®s); + family = alc_cpuid_get_family(regs.eax); + model = alc_cpuid_get_model(regs.eax); + if (family >= ALC_CPU_FAMILY_ZEN) { + mfg_info->family = (uint16_t)family; + mfg_info->model = (uint16_t)model; + } + mfg_info->stepping = alc_cpuid_get_stepping(regs.eax); + } +} +static void +__init_cpu_features(void) +{ + static unsigned initialized = 0; + struct alc_cpu_mfg_info* mfg_info = &cpu_features.cpu_mfg_info; + int arr_size = ARRAY_SIZE(__cpuid_values); + if (initialized == INITIALIZED_MAGIC) + return; + struct alc_cpuid_regs regs; + __cpuid_1(0, ®s); + /* "AuthenticAMD" */ + if (regs.ebx == 0x68747541 && regs.ecx == 0x444d4163 + && regs.edx == 0x69746e65) { + cpu_features.cpu_mfg_info.mfg_type = ALC_CPU_MFG_AMD; + } + for (int i = 0; i < arr_size; i++) { + struct alc_cpuid_regs ft; + __cpuid_2(__cpuid_values[i].eax, __cpuid_values[i].ecx, &ft); + cpu_features.available[i].eax = ft.eax; + cpu_features.available[i].ebx = ft.ebx; + cpu_features.available[i].ecx = ft.ecx; + cpu_features.available[i].edx = ft.edx; + } + __get_mfg_info(&cpu_features.available[ALC_CPUID_EAX_1], mfg_info); + /* + * Globally disable some *_USEABLE flags, so that all ifunc's + * sees them + */ + if (mfg_info->mfg_type == ALC_CPU_MFG_AMD + && mfg_info->family >= ALC_CPU_FAMILY_ZEN) { + memcpy(&cpu_features.usable[0], + &cpu_features.available[0], + sizeof(cpu_features.usable)); + } + initialized = INITIALIZED_MAGIC; +} +uint32_t +alc_cpu_has_avx512f(void) +{ + __init_cpu_features(); + return ALC_CPU_HAS_AVX512F(&cpu_features); +} +uint32_t +alc_cpu_has_avx512dq(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512DQ(&cpu_features); +} +uint32_t +alc_cpu_has_avx512bw(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512BW(&cpu_features); +} +uint32_t +alc_cpu_has_avx512er(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512ER(&cpu_features); +} +uint32_t +alc_cpu_has_avx512cd(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512CD(&cpu_features); +} +uint32_t +alc_cpu_has_avx512vl(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512VL(&cpu_features); +} +uint32_t +alc_cpu_has_avx512pf(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512PF(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_ifma(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_IFMA(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vnni(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VNNI(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_bitalg(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_BITALG(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vbmi(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VBMI(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vbmi2(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VBMI2(&cpu_features); +} +uint32_t +alc_cpu_has_avx512_vpopcntdq(void) +{ + if (alc_cpu_has_avx512f() == 0) + return 0; + __init_cpu_features(); + return ALC_CPU_HAS_AVX512_VPOPCNTDQ(&cpu_features); +} diff --git a/FRAMEWORK/cpu_features.h b/FRAMEWORK/cpu_features.h new file mode 100644 index 00000000..4ede1e6b --- /dev/null +++ b/FRAMEWORK/cpu_features.h @@ -0,0 +1,175 @@ + +/* ************************************************************************ + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef SL_CPUFEATURES_H +#define SL_CPUFEATURES_H +#include +#include +#include +enum { + ALC_CPUID_EAX_1 = 0, + ALC_CPUID_EAX_7, + ALC_CPUID_EAX_8_01, /* 8000.0001 */ + ALC_CPUID_EAX_8_07, /* 8000.0007 */ + ALC_CPUID_EAX_8_08, /* 8000.0008 */ + /* Last entry */ + ALC_CPUID_MAX, +}; +enum { + /*EBX Values*/ + ALC_CPUID_BIT_FSGSBASE = (1u << 0), + ALC_CPUID_BIT_TSC_ADJUST = (1u << 1), + ALC_CPUID_BIT_SGX = (1u << 2), + ALC_CPUID_BIT_BMI1 = (1u << 3), + ALC_CPUID_BIT_HLE = (1u << 4), + ALC_CPUID_BIT_AVX2 = (1u << 5), + ALC_CPUID_BIT_SMEP = (1u << 7), + ALC_CPUID_BIT_BMI2 = (1u << 8), + ALC_CPUID_BIT_ERMS = (1u << 9), + ALC_CPUID_BIT_INVPCID = (1u << 10), + ALC_CPUID_BIT_RTM = (1u << 11), + ALC_CPUID_BIT_TSX = ALC_CPUID_BIT_RTM, + ALC_CPUID_BIT_PQM = (1u << 12), + ALC_CPUID_BIT_MPX = (1u << 14), + ALC_CPUID_BIT_PQE = (1u << 15), + ALC_CPUID_BIT_AVX512F = (1u << 16), + ALC_CPUID_BIT_AVX512DQ = (1u << 17), + ALC_CPUID_BIT_RDSEED = (1u << 18), + ALC_CPUID_BIT_ADX = (1u << 19), + ALC_CPUID_BIT_SMAP = (1u << 20), + ALC_CPUID_BIT_AVX512_IFMA = (1u << 21), + ALC_CPUID_BIT_CLFLUSHOPT = (1u << 22), + ALC_CPUID_BIT_CLWB = (1u << 24), + ALC_CPUID_BIT_TRACE = (1u << 25), + ALC_CPUID_BIT_AVX512PF = (1u << 26), + ALC_CPUID_BIT_AVX512ER = (1u << 27), + ALC_CPUID_BIT_AVX512CD = (1u << 28), + ALC_CPUID_BIT_SHA = (1u << 29), + ALC_CPUID_BIT_AVX512BW = (1u << 30), + ALC_CPUID_BIT_AVX512VL = (1u << 31), + /* ECX Values*/ + ALC_CPUID_BIT_PREFETCHWT1 = (1u << 0), + ALC_CPUID_BIT_AVX512_VBMI = (1u << 1), + ALC_CPUID_BIT_UMIP = (1u << 2), + ALC_CPUID_BIT_PKU = (1u << 3), + ALC_CPUID_BIT_OSPKE = (1u << 4), + ALC_CPUID_BIT_WAITPKG = (1u << 5), + ALC_CPUID_BIT_AVX512_VBMI2 = (1u << 6), + ALC_CPUID_BIT_SHSTK = (1u << 7), + ALC_CPUID_BIT_GFNI = (1u << 8), + ALC_CPUID_BIT_VAES = (1u << 9), + ALC_CPUID_BIT_VPCLMULQDQ = (1u << 10), + ALC_CPUID_BIT_AVX512_VNNI = (1u << 11), + ALC_CPUID_BIT_AVX512_BITALG = (1u << 12), + ALC_CPUID_BIT_AVX512_VPOPCNTDQ = (1u << 14), + ALC_CPUID_BIT_RDPID = (1u << 22), + ALC_CPUID_BIT_CLDEMOTE = (1u << 25), + ALC_CPUID_BIT_MOVDIRI = (1u << 27), + ALC_CPUID_BIT_MOVDIR64B = (1u << 28), + ALC_CPUID_BIT_SGX_LC = (1u << 30), + /* EDX Values */ + ALC_CPUID_BIT_AVX512_4VNNIW = (1u << 2), + ALC_CPUID_BIT_AVX512_4FMAPS = (1u << 3), + ALC_CPUID_BIT_FSRM = (1u << 4), + ALC_CPUID_BIT_PCONFIG = (1u << 18), + ALC_CPUID_BIT_IBT = (1u << 20), + ALC_CPUID_BIT_IBRS_IBPB = (1u << 26), + ALC_CPUID_BIT_STIBP = (1u << 27), + ALC_CPUID_BIT_CAPABILITIES = (1u << 29), + ALC_CPUID_BIT_SSBD = (1u << 31), +}; +#define ALC_CPU_FAMILY_ZEN 0x17 +#define ALC_CPU_FAMILY_ZEN_PLUS 0x17 +#define ALC_CPU_FAMILY_ZEN2 0x17 +#define ALC_CPU_FAMILY_ZEN3 0x19 +#define ALC_CPU_FAMILY_ZEN4 0x19 +static inline uint32_t +__extract32(uint32_t value, int start, int length); +static inline uint16_t +alc_cpuid_get_family(uint32_t var); +static inline uint16_t +alc_cpuid_get_model(uint32_t var); +static inline uint16_t +alc_cpuid_get_stepping(uint32_t var); +/* ID return values */ +struct alc_cpuid_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; +typedef enum { + ALC_CPU_MFG_INTEL, + ALC_CPU_MFG_AMD, + ALC_CPU_MFG_OTHER, +} alc_cpu_mfg_t; +struct alc_cpu_mfg_info { + alc_cpu_mfg_t mfg_type; + uint16_t family; + uint16_t model; + uint16_t stepping; +}; +struct alc_cpu_features { + struct alc_cpu_mfg_info cpu_mfg_info; + struct alc_cpuid_regs available[ALC_CPUID_MAX]; + struct alc_cpuid_regs usable[ALC_CPUID_MAX]; +}; +static inline void __cpuid(struct alc_cpuid_regs *out); +static inline void __cpuid_1(uint32_t eax, struct alc_cpuid_regs *out); +static inline void __cpuid_2(uint32_t eax, uint32_t ecx, struct alc_cpuid_regs *out); +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif +#define INITIALIZED_MAGIC 0xdeadbeaf +static void +__get_mfg_info(struct alc_cpuid_regs* cpuid_regs, + struct alc_cpu_mfg_info* mfg_info); +static void +__init_cpu_features(void); +uint32_t +alc_cpu_has_avx512f(void); +uint32_t +alc_cpu_has_avx512dq(void); +uint32_t +alc_cpu_has_avx512bw(void); +uint32_t +alc_cpu_has_avx512er(void); +uint32_t +alc_cpu_has_avx512cd(void); +uint32_t +alc_cpu_has_avx512vl(void); +uint32_t +alc_cpu_has_avx512pf(void); +uint32_t +alc_cpu_has_avx512_ifma(void); +uint32_t +alc_cpu_has_avx512_vnni(void); +uint32_t +alc_cpu_has_avx512_bitalg(void); +uint32_t +alc_cpu_has_avx512_vbmi(void); +uint32_t +alc_cpu_has_avx512_vbmi2(void); +uint32_t +alc_cpu_has_avx512_vpopcntdq(void); +#endif //SL_CPUFEATURES_H diff --git a/LICENSE b/LICENSE index e5aa1c83..bd156b6b 100644 --- a/LICENSE +++ b/LICENSE @@ -5,7 +5,7 @@ Copyright (c) 2000-2011 The University of California Berkeley. All rights reserved. Copyright (c) 2006-2011 The University of Colorado Denver. All rights reserved. -Copyright (C) 2020-2022 Advanced Micro Devices, Inc. All rights +Copyright (C) 2020-2023 Advanced Micro Devices, Inc. All rights reserved. $COPYRIGHT$ diff --git a/README_ScaLAPACK_AMD b/README_ScaLAPACK_AMD index 5fca3ca3..a2319339 100644 --- a/README_ScaLAPACK_AMD +++ b/README_ScaLAPACK_AMD @@ -1,48 +1,22 @@ -AOCL-ScaLAPACK -=============================================================================== -ScaLAPACK, or Scalable LAPACK, is a library of high performance linear algebra -routines for distributed memory computers supporting MPI. +# AOCL-ScaLAPACK -AOCL-ScaLAPACK is the optimized version of ScaLAPACK for AMD EPYC family of -processors. +AOCL-ScaLAPACK is a library of high-performance linear algebra routines for +parallel distributed memory machines. It can be used to solve linear systems, +least squares problems, eigenvalue problems, and singular value problems. -=============================================================================== +AOCL-ScaLAPACK is forked from upstream Netlib ScaLAPACK GitHub +[repository](https://github.com/Reference-ScaLAPACK/scalapack). This fork has +ScaLAPACK optimized for AMD “Zen†core based processors. It depends on external +libraries BLAS and LAPACK. For AMD CPUs, use of AOCL-BLIS and AOCL-libFLAME is +recommended. -1. Install MPI library and set the PATH and LD_LIBRARY_PATH environment - variables to point to installed binaries. - eg. export PATH=/bin:$PATH - eg. export LD_LIBRARY_PATH=/lib:$LD_LIBRARY_PATH +For detailed instructions on how to configure, build, install, and link against +AOCL-ScaLAPACK on AMD CPUs, please refer to the AOCL User Guide located on AMD +developer [portal](https://www.amd.com/en/developer/aocl.html). -2. Download AMD optimized versions of BLIS and libFLAME from following link - https://developer.amd.com/amd-aocl/ +For any issues/suggestion in the "amd" fork of ScaLAPACK, please email +toolchainsupport@amd.com. -3. Install latest CMAKE tool. - -4. Install AOCL-BLIS and AOCL-libFLAME libraries either using pre-built binaries or build - from source. - To build AOCL-BLIS and AOCL-libFLAME from source, clone from following github links - BLIS: https://github.com/amd/blis - libFLAME: https://github.com/amd/libflame - -5. Steps to build the AOCL-ScaLAPACK library and the test suite: - - a. Create a new directory. For example, build: - $ mkdir build - $ cd build - - b. Set PATH and LD_LIBRARY_PATH appropriately to the MPI installation. - - c. To Build the AOCL-ScaLAPACK library and the test suite, Run the below commands: - $ cmake .. -DBUILD_SHARED_LIBS=OFF -DBLAS_LIBRARIES="-fopenmp /libblis-mt.a" - -DLAPACK_LIBRARIES="/libflame.a" - -DCMAKE_C_COMPILER=mpicc -DCMAKE_Fortran_COMPILER=mpif90 - -DUSE_OPTIMIZED_LAPACK_BLAS=OFF [-D DENABLE_ILP64=ON] - $ make -j - - This command generates the AOCL-ScaLAPACK library in the 'build/lib' folder and test applications in the 'build/TESTING' folder. - -4. To Run the AOCL-ScaLAPACK test suite, Run the below script in the 'build/' folder: - cp -f ../scalapack_test.sh . - ./scalapack_test.sh - Test logs will be generated in 'ScalaPack_TestResults.txt'. +Also, please read the LICENSE file for information on copying and distributing +this software. diff --git a/REDIST/SRC/redist.h b/REDIST/SRC/redist.h index 0bdfe270..57d2331a 100644 --- a/REDIST/SRC/redist.h +++ b/REDIST/SRC/redist.h @@ -1,6 +1,7 @@ #ifdef ENABLE_ILP64 #include #endif +#include "../BLACS/SRC/Bdef.h" #ifdef T3D #define float double diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt index 8887bfd0..0954e3f6 100644 --- a/SRC/CMakeLists.txt +++ b/SRC/CMakeLists.txt @@ -1,17 +1,17 @@ -##Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.## +##Copyright (C) 2021-2023, Advanced Micro Devices, Inc. All rights reserved.## set (ALLAUX pjlaenv.f pilaenvx.f piparmq.f pilaver.f pmpim2.f pmpcol.f) if(ENABLE_DTL) set (ALLAUX-C pbchkvect.c getpbbuf.c pcrot.c pslaiect.c pdlaiect.c pzrot.c - slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c + slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c get_aocl_scalapack_version.c aocl_dtl_trace_entry.c aocl_dtl_trace_exit.c) else(ENABLE_DTL) set (ALLAUX-C pbchkvect.c getpbbuf.c pcrot.c pslaiect.c pdlaiect.c pzrot.c - slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c - get_aocl_scalapack_version.c) + slamov.c clamov.c dlamov.c zlamov.c aocl_scalapack_progress.c + get_aocl_scalapack_version.c aocl_dtl_trace_entry.c aocl_dtl_trace_exit.c) ENDIF(ENABLE_DTL) set (SCLAUX diff --git a/SRC/aocl_dtl_trace_entry.c b/SRC/aocl_dtl_trace_entry.c index e20fc172..d3c5be8a 100644 --- a/SRC/aocl_dtl_trace_entry.c +++ b/SRC/aocl_dtl_trace_entry.c @@ -1,7 +1,7 @@ /* --------------------------------------------------------------------- * * -- AOCL ScaLAPACK routine -- -* Copyright (c) 2020-2022 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2020-2023 Advanced Micro Devices, Inc.  All rights reserved. * * --------------------------------------------------------------------- */ @@ -17,15 +17,18 @@ /* Customized for Fortran calls from Scalapack code */ -void aocl_dtl_log_entry_( char *buffer ) +void aocl_sl_dtl_log_entry_( const char *filename, const char *function_name, + unsigned int *line_number, const char *buffer ) { -#if AOCL_DTL_LOG_ENABLE - /* Capture the contents to the DTL log file */ - AOCL_DTL_LOG(AOCL_DTL_LEVEL_INFO, buffer); -#endif + DTL_Trace(AOCL_DTL_LEVEL_INFO, + TRACE_TYPE_LOG, + filename, + function_name, + *line_number, + buffer); } -void aocl_dtl_trace_entry_( const char * fileName, unsigned int * lineNumber, +void aocl_sl_dtl_trace_entry_( const char * fileName, unsigned int * lineNumber, const char * message ) { #if AOCL_DTL_TRACE_ENABLE diff --git a/SRC/aocl_dtl_trace_exit.c b/SRC/aocl_dtl_trace_exit.c index 0af64043..aaaa7afe 100644 --- a/SRC/aocl_dtl_trace_exit.c +++ b/SRC/aocl_dtl_trace_exit.c @@ -16,7 +16,7 @@ #include "../AOCL_DTL/aocldtl.h" #include "pxsyevx.h" -void aocl_dtl_trace_exit_( const char * fileName, unsigned int * lineNumber, +void aocl_sl_dtl_trace_exit_( const char * fileName, unsigned int * lineNumber, const char * message ) { #if AOCL_DTL_TRACE_ENABLE diff --git a/SRC/aocl_scalapack_progress.c b/SRC/aocl_scalapack_progress.c index 0ac27935..fb149394 100644 --- a/SRC/aocl_scalapack_progress.c +++ b/SRC/aocl_scalapack_progress.c @@ -20,14 +20,23 @@ void aocl_scalapack_set_progress_( aocl_scalapack_progress_callback func ) aocl_scalapack_progress_ptr_ = func; } -void aocl_scalapack_progress_(char* api, integer *lenapi, integer* progress, - integer* current_process, integer *total_processes) +integer aocl_scalapack_progress_(const char* const api, const integer *lenapi, const integer* progress, + const integer* current_process, const integer *total_processes) { - integer ret; - - if (aocl_scalapack_progress_ptr_ != NULL ) { + integer ret = 0; + if (aocl_scalapack_progress_ptr_ != NULL ) { ret = aocl_scalapack_progress_ptr_ ( api, lenapi, progress, current_process, total_processes); } - - return; + + return ret; +} +integer AOCL_SCALAPACK_PROGRESS(const char* const api, const integer* lenapi, const integer* progress, + const integer* current_process, const integer* total_processes) +{ + integer ret = 0; + if (aocl_scalapack_progress_ptr_ != NULL) { + ret = aocl_scalapack_progress_ptr_(api, lenapi, progress, current_process, total_processes); + } + + return ret; } diff --git a/SRC/aocl_scalapack_progress.h b/SRC/aocl_scalapack_progress.h index 5941d261..613ae21e 100644 --- a/SRC/aocl_scalapack_progress.h +++ b/SRC/aocl_scalapack_progress.h @@ -22,16 +22,28 @@ typedef unsigned long uinteger; #endif typedef integer ( *aocl_scalapack_progress_callback )( -char *api, -integer *lenapi, -integer *progress, -integer *current_process, -integer *total_processes +const char * const api, +const integer *lenapi, +const integer *progress, +const integer *current_process, +const integer *total_processes ); -integer aocl_scalapack_progress ( char* api, integer *lenapi, integer* progress, - integer* current_process, integer *total_processes ); +integer aocl_scalapack_progress_( + const char* const api, + const integer* lenapi, + const integer* progress, + const integer* current_process, + const integer* total_processes +); +integer AOCL_SCALAPACK_PROGRESS( + const char* const api, + const integer* lenapi, + const integer* progress, + const integer* current_process, + const integer* total_processes +); aocl_scalapack_progress_callback aocl_scalapack_progress_ptr_; diff --git a/SRC/get_aocl_scalapack_version.c b/SRC/get_aocl_scalapack_version.c index 9cd0dfe2..1d42b3d6 100644 --- a/SRC/get_aocl_scalapack_version.c +++ b/SRC/get_aocl_scalapack_version.c @@ -3,7 +3,7 @@ /* --------------------------------------------------------------------- * * -- AOCL ScaLAPACK routine -- -* Copyright (c) 2020-2022 Advanced Micro Devices, Inc.  All rights reserved. +* Copyright (c) 2020-2023 Advanced Micro Devices, Inc.  All rights reserved. * * --------------------------------------------------------------------- */ @@ -20,40 +20,43 @@ #define _VERSION_MAKE_STR(x) #x #ifdef __STDC__ -void get_aocl_scalapack_version_( char * version ) +void get_aocl_scalapack_version_( char * version, int *ver_str_len ) #else -void get_aocl_scalapack_version_( version ) +void get_aocl_scalapack_version_( version, ver_str_len ) char * version; + int *ver_str_len; #endif { #ifdef AOCL_SCALAPACK_VERSION - char slmainversion[] = "AOCL-ScaLAPACK 4.0 "; + char slmainversion[] = "AOCL-ScaLAPACK 4.1.0 "; char slversion[1000]; char scalapackversion[] = ", supports ScaLAPACK 2.2.0"; int length, i; length = 0; - for (i = 0; i 0 from PDTRTRI, then U is singular, * and the inverse is not computed. * CALL PDTRTRI( 'Upper', 'Non-unit', N, A, IA, JA, DESCA, INFO ) - IF( INFO.GT.0 ) - $ RETURN + IF( INFO.GT.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Define array descriptor for working array WORK * @@ -371,6 +425,10 @@ SUBROUTINE PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETRI diff --git a/SRC/pdgetrs.f b/SRC/pdgetrs.f index 15ed7709..f30796a1 100644 --- a/SRC/pdgetrs.f +++ b/SRC/pdgetrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, $ IB, JB, DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -177,13 +184,43 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, IA, IB, INFO, JA, JB, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGETRS inputs:,TRANS:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -232,13 +269,22 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGETRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. NRHS.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL DESCSET( DESCIP, DESCA( M_ ) + DESCA( MB_ )*NPROW, 1, $ DESCA( MB_ ), 1, DESCA( RSRC_ ), MYCOL, ICTXT, @@ -284,6 +330,10 @@ SUBROUTINE PDGETRS( TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGETRS diff --git a/SRC/pdggqrf.f b/SRC/pdggqrf.f index 2f2487f3..49c9b6bf 100644 --- a/SRC/pdggqrf.f +++ b/SRC/pdggqrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. @@ -280,13 +287,44 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, LWORK, + $ M, N, P, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGGQRF inputs:,IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',P:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -342,8 +380,16 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGQRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -363,6 +409,10 @@ SUBROUTINE PDGGQRF( N, M, P, A, IA, JA, DESCA, TAUA, B, IB, JB, CALL PDGERQF( N, P, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGGQRF diff --git a/SRC/pdggrqf.f b/SRC/pdggrqf.f index 78fd5eac..d042a07b 100644 --- a/SRC/pdggrqf.f +++ b/SRC/pdggrqf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, $ DESCB, TAUB, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, INFO, JA, JB, LWORK, M, N, P * .. @@ -280,13 +287,44 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, INT, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, INFO, JA, JB, LWORK, + $ M, N, P, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDGGRQF inputs:,IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',P:',I5,',NPROW:',I5, + $ ',NPCOL:',I5 ,',MYROW:',I5, + $ ',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -342,8 +380,16 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDGGRQF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -364,6 +410,10 @@ SUBROUTINE PDGGRQF( M, P, N, A, IA, JA, DESCA, TAUA, B, IB, JB, CALL PDGEQRF( P, N, B, IB, JB, DESCB, TAUB, WORK, LWORK, INFO ) WORK( 1 ) = DBLE( MAX( LWMIN, INT( WORK( 1 ) ) ) ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDGGRQF diff --git a/SRC/pdhseqr.f b/SRC/pdhseqr.f index 6e0f7510..d7e8cb3b 100644 --- a/SRC/pdhseqr.f +++ b/SRC/pdhseqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, $ DESCZ, WORK, LWORK, IWORK, LIWORK, INFO ) * @@ -9,6 +15,7 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -277,13 +284,44 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Decode and check the input parameters. * INFO = 0 ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHI, ILO, INFO, LWORK, LIWORK, + $ N, COMPZ, JOB, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDHSEQR inputs:,IHI:',I5,',ILO:',I5,',INFO:',I5, + $ ',LWORK:',I5,',LIWORK:',I5, + $ ',N:',I5,',COMPZ:',A5,',JOB:',A5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL IF( NPROW.EQ.-1 ) INFO = -(600+CTXT_) IF( INFO.EQ.0 ) THEN @@ -356,18 +394,30 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * Quick return in case of invalid argument. * CALL PXERBLA( ICTXT, 'PDHSEQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE IF( N.EQ.0 ) THEN * * Quick return in case N = 0; nothing to do. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE IF( LQUERY ) THEN * * Quick return in case of a workspace query. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE @@ -422,6 +472,10 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, $ 1, HRSRC, HCSRC ) END IF WI( ILO ) = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -677,6 +731,10 @@ SUBROUTINE PDHSEQR( JOB, COMPZ, N, ILO, IHI, H, DESCH, WR, WI, Z, * WORK(1) = LWKOPT IWORK(1) = LIWKOPT +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDHSEQR diff --git a/SRC/pdlabad.f b/SRC/pdlabad.f index 4fd00d82..9834d5b7 100644 --- a/SRC/pdlabad.f +++ b/SRC/pdlabad.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER ICTXT DOUBLE PRECISION LARGE, SMALL @@ -53,8 +60,24 @@ SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) * .. Intrinsic Functions .. INTRINSIC LOG10, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * If it looks like we're on a Cray, take the square root of * SMALL and LARGE to avoid overflow and underflow problems. * @@ -69,6 +92,10 @@ SUBROUTINE PDLABAD( ICTXT, SMALL, LARGE ) CALL DGAMN2D( ICTXT, 'All', ' ', 1, 1, LARGE, 1, IDUMM, $ IDUMM, -1, -1, IDUMM ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLABAD diff --git a/SRC/pdlabrd.f b/SRC/pdlabrd.f index c647f7c4..9acdcd19 100644 --- a/SRC/pdlabrd.f +++ b/SRC/pdlabrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, $ X, IX, JX, DESCX, Y, IY, JY, DESCY, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IX, IY, JA, JX, JY, M, N, NB * .. @@ -269,15 +276,49 @@ SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IX, IY, JA, JX, JY, M, N, + $ NB, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLABRD inputs:,IA:',I5,',IX:',I5,',IY:',I5, + $ ',JA:',I5,',JX:',I5,',JY:',I5, + $ ',M:',I5,',N:',I5,',NB:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) IPY = DESCA( MB_ ) + 1 @@ -487,6 +528,10 @@ SUBROUTINE PDLABRD( M, N, NB, A, IA, JA, DESCA, D, E, TAUQ, TAUP, 20 CONTINUE END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLABRD diff --git a/SRC/pdlacon.f b/SRC/pdlacon.f index 74b9eabd..88fa8cee 100644 --- a/SRC/pdlacon.f +++ b/SRC/pdlacon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, $ EST, KASE ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IV, IX, JV, JX, KASE, N DOUBLE PRECISION EST @@ -180,18 +187,52 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, * .. Save statement .. SAVE * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ESTWORK( 1 ) = EST ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IV, IX, JV, JX, KASE, N, EST, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACON inputs:,IV:',I5,',IX:',I5,',JV:',I5, + $ ',JX:',I5,',KASE:',I5,',N:',I5, + $ ',EST:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIVX, JJVX, IVXROW, IVXCOL ) - IF( MYCOL.NE.IVXCOL ) - $ RETURN + IF( MYCOL.NE.IVXCOL ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IROFF = MOD( IX-1, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IVXROW, NPROW ) IF( MYROW.EQ.IVXROW ) @@ -204,6 +245,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 10 CONTINUE KASE = 1 JUMP = 1 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -241,6 +286,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 30 CONTINUE KASE = 2 JUMP = 2 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 2) @@ -275,6 +324,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, END IF KASE = 1 JUMP = 3 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 3) @@ -315,6 +368,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 100 CONTINUE KASE = 2 JUMP = 4 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 4) @@ -356,6 +413,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, 130 CONTINUE KASE = 1 JUMP = 5 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * ................ ENTRY (JUMP = 5) @@ -381,6 +442,10 @@ SUBROUTINE PDLACON( N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, KASE = 0 * EST = ESTWORK( 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACON diff --git a/SRC/pdlaconsb.f b/SRC/pdlaconsb.f index 1323a205..87403f5d 100644 --- a/SRC/pdlaconsb.f +++ b/SRC/pdlaconsb.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, $ LWORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER I, L, LWORK, M DOUBLE PRECISION H33, H43H34, H44 @@ -181,13 +188,42 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) I, L, LWORK, M, H33, H43H34, + $ H44, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACONSB inputs:,I:',I5,',L:',I5,',LWORK:',I5, + $ ',M:',I5,',H33:',F9.4,',H43H34:',F9.4, + $ ',H44:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) @@ -212,6 +248,10 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, END IF IF( LWORK.LT.7*ISTR2 ) THEN CALL PXERBLA( CONTXT, 'PDLACONSB', 10 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF ISTR3 = 3*ISTR2 @@ -567,6 +607,10 @@ SUBROUTINE PDLACONSB( A, DESCA, I, L, M, H44, H33, H43H34, BUF, * CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, M, 1, L, L, -1, -1, -1 ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACONSB diff --git a/SRC/pdlacp2.f b/SRC/pdlacp2.f index dc7b44c9..4b555358 100644 --- a/SRC/pdlacp2.f +++ b/SRC/pdlacp2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * @@ -5,6 +11,7 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N @@ -166,14 +173,48 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, JA, JB, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACP2 inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',JA:',I5,',JB:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -228,8 +269,13 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, IF( MYCOL.EQ.IACOL ) THEN * MP = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) - IF( MP.LE.0 ) - $ RETURN + IF( MP.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYROW.EQ.IAROW ) $ MP = MP - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) @@ -326,8 +372,13 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, IF( MYROW.EQ.IAROW ) THEN * NQ = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) - IF( NQ.LE.0 ) - $ RETURN + IF( NQ.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYCOL.EQ.IACOL ) $ NQ = NQ - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) @@ -398,6 +449,10 @@ SUBROUTINE PDLACP2( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACP2 diff --git a/SRC/pdlacp3.f b/SRC/pdlacp3.f index 3b1aa0ed..6e31f36e 100644 --- a/SRC/pdlacp3.f +++ b/SRC/pdlacp3.f @@ -1,4 +1,11 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK routine (version 1.7) -- @@ -109,7 +116,7 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * II (global input) INTEGER * By using REV 0 & 1, data can be sent out and returned again. * If REV=0, then II is destination row index for the node(s) -* receiving the replicated B. +* receiving the replicated B. * If II>=0,JJ>=0, then node (II,JJ) receives the data * If II=-1,JJ>=0, then all rows in column JJ receive the * data @@ -122,7 +129,7 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * Similar description as II above * * REV (global input) INTEGER -* Use REV = 0 to send global A into locally replicated B +* Use REV = 0 to send global A into locally replicated B * (on node (II,JJ)). * Use REV <> 0 to send locally replicated B from node (II,JJ) * to its owner (which changes depending on its location in @@ -158,10 +165,31 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.LE.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) @@ -170,6 +198,19 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) JAFIRST = DESCA( CSRC_ ) * CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) I, II, JJ, LDB, M, REV, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLACP3 inputs:,I:',I5,',II:',I5,',JJ:',I5, + $ ',LDB:',I5,',M:',I5,',REV:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( REV.EQ.0 ) THEN DO 20 IDI = 1, M @@ -306,6 +347,10 @@ SUBROUTINE PDLACP3( M, I, A, DESCA, B, LDB, II, JJ, REV ) IF( IDJ.LE.IFIN ) $ GO TO 30 END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACP3 diff --git a/SRC/pdlacpy.f b/SRC/pdlacpy.f index 741847b7..5b59733f 100644 --- a/SRC/pdlacpy.f +++ b/SRC/pdlacpy.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, JA, JB, M, N @@ -160,10 +167,31 @@ SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+M-1 ) JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) @@ -224,6 +252,10 @@ SUBROUTINE PDLACPY( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLACPY diff --git a/SRC/pdlaed0.f b/SRC/pdlaed0.f index a6368e1c..a01f1b09 100644 --- a/SRC/pdlaed0.f +++ b/SRC/pdlaed0.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, IQ, JQ, N * .. @@ -99,20 +106,58 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, IQ, JQ, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED0 inputs:,INFO:',I5,',IQ:',I5,',JQ:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF INFO = 0 IF( DESCQ( NB_ ).GT.N .OR. N.LT.2 ) $ INFO = -1 IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDLAED0', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -162,6 +207,10 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) $ WORK, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'DSTEQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF IF( MYROW.NE.IQROW .OR. MYCOL.NE.IQCOL ) THEN @@ -228,6 +277,10 @@ SUBROUTINE PDLAED0( N, D, E, Q, IQ, JQ, DESCQ, WORK, IWORK, INFO ) * end while * 90 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED0 diff --git a/SRC/pdlaed1.f b/SRC/pdlaed1.f index 413d149b..69b9b446 100644 --- a/SRC/pdlaed1.f +++ b/SRC/pdlaed1.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, $ IWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER ID, INFO, IQ, JQ, N, N1 DOUBLE PRECISION RHO @@ -136,16 +143,51 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) ID, INFO, IQ, JQ, N, N1, RHO, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED1 inputs:,ID:',I5,',INFO:',I5,',IQ:',I5, + $ ',JQ:',I5,',N:',I5,',N1:',I5, + $ ',RHO:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF INFO = 0 IF( NPROW.EQ.-1 ) THEN INFO = -( 600+CTXT_ ) @@ -158,13 +200,22 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDLAED1', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * The following values are integer pointers which indicate * the portion of the workspace used by a particular array @@ -265,6 +316,10 @@ SUBROUTINE PDLAED1( N, N1, D, ID, Q, IQ, JQ, DESCQ, RHO, WORK, END IF * 20 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED1 diff --git a/SRC/pdlaed2.f b/SRC/pdlaed2.f index 72d43515..5ed50eee 100644 --- a/SRC/pdlaed2.f +++ b/SRC/pdlaed2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM, $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN, @@ -8,6 +14,7 @@ SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N, $ N1, NB, NN, NN1, NN2, NPCOL @@ -177,15 +184,53 @@ SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, * .. Local Arrays .. INTEGER PTT( 4 ) * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_PINFO( IAM, NPROCS ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DCOL, DROW, IB1, IB2, ICTXT, + $ K, LDQ, LDQ2, N, N1, + $ NB, NN, NN1, NN2, NPCOL, RHO, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED2 inputs:,DCOL:',I5,',DROW:',I5, + $ ',IB1:',I5,',IB2:',I5,',ICTXT:',I5,',K:',I5, + $ ',LDQ:',I5,',LDQ2:',I5,',N:',I5, + $ ',N1:',I5,',NB:',I5,',NN:',I5,',NN1:',I5, + $ ',NN2:',I5,',NPCOL:',I5,',RHO:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NUMROC( N, NB, MYROW, DROW, NPROW ) * N2 = N - N1 @@ -448,6 +493,10 @@ SUBROUTINE PDLAED2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, NN2 = IE2 - IB2 + 1 NN = MAX( IE1, IE2 ) - MIN( IB1, IB2 ) + 1 220 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED2 diff --git a/SRC/pdlaed3.f b/SRC/pdlaed3.f index 9f1f6305..6e43c7dd 100644 --- a/SRC/pdlaed3.f +++ b/SRC/pdlaed3.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, $ W, Z, U, LDU, BUF, INDX, INDCOL, INDROW, $ INDXR, INDXC, CTOT, NPCOL, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER DCOL, DROW, ICTXT, INFO, K, LDU, N, NB, NPCOL DOUBLE PRECISION RHO @@ -148,18 +155,54 @@ SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, * .. Intrinsic Functions .. INTRINSIC MOD, SIGN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters. * INFO = 0 * * Quick return if possible * - IF( K.EQ.0 ) - $ RETURN + IF( K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DCOL, DROW, ICTXT, INFO, K, + $ LDU, N, NB, NPCOL, RHO, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAED3 inputs:,DCOL:',I5,',DROW:',I5, + $ ',ICTXT:',I5,',INFO:',I5,',K:',I5,',LDU:',I5, + $ ',N:',I5,',NB:',I5,',NPCOL:',I5, + $ ',RHO:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * ROW = DROW COL = DCOL @@ -344,6 +387,10 @@ SUBROUTINE PDLAED3( ICTXT, K, N, NB, D, DROW, DCOL, RHO, DLAMDA, * 190 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAED3 diff --git a/SRC/pdlaedz.f b/SRC/pdlaedz.f index df903c34..41df3748 100644 --- a/SRC/pdlaedz.f +++ b/SRC/pdlaedz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER ID, IQ, JQ, LDQ, N, N1 * .. @@ -46,15 +53,50 @@ SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) INTEGER NUMROC EXTERNAL NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCQ( CTXT_ ) NB = DESCQ( NB_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) ID, IQ, JQ, LDQ, N, N1, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAEDZ inputs:,ID:',I5,',IQ:',I5,',JQ:',I5, + $ ',LDQ:',I5,',N:',I5,',N1:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( ID, ID, DESCQ, NPROW, NPCOL, MYROW, MYCOL, IIQ, JJQ, $ IQROW, IQCOL ) N2 = N - N1 @@ -145,6 +187,10 @@ SUBROUTINE PDLAEDZ( N, N1, ID, Q, IQ, JQ, LDQ, DESCQ, Z, WORK ) CALL DGEBR2D( ICTXT, 'All', ' ', N, 1, Z, N, IQROW, IQCOL ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAEDZ diff --git a/SRC/pdlaevswp.f b/SRC/pdlaevswp.f index 7e6ed409..ba9cd9ac 100644 --- a/SRC/pdlaevswp.f +++ b/SRC/pdlaevswp.f @@ -1,4 +1,10 @@ * +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, $ WORK, LWORK ) @@ -8,6 +14,7 @@ SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, * and University of California, Berkeley. * April 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IZ, JZ, LDZI, LWORK, N * .. @@ -152,11 +159,45 @@ SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IZ, JZ, LDZI, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAEVSWP inputs:,IZ:',I5,',JZ:',I5,',LDZI:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF IAM = MYROW + MYCOL*NPROW IAM = MYROW*NPCOL + MYCOL * @@ -279,6 +320,10 @@ SUBROUTINE PDLAEVSWP( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, 100 CONTINUE * 110 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAEVSWP diff --git a/SRC/pdlahqr.f b/SRC/pdlahqr.f index 7d857dd1..6f6fb7cd 100644 --- a/SRC/pdlahqr.f +++ b/SRC/pdlahqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK, $ ILWORK, INFO ) @@ -6,6 +12,7 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. LOGICAL WANTT, WANTZ INTEGER IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N @@ -279,14 +286,35 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SIGN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) * ITERMAX = 0 - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -297,6 +325,21 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) WANTT, WANTZ, IHI, IHIZ, ILO, + $ ILOZ, ILWORK, INFO, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAHQR inputs:,WANTT:',L2,',WANTZ:',L2, + $ ',IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',ILWORK:',I5,',INFO:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) @@ -346,6 +389,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ -1, -1 ) IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PDLAHQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -373,6 +420,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, WR( ILO ) = ZERO END IF WI( ILO ) = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -2002,6 +2053,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * Failure to converge in remaining number of iterations * INFO = I +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * 430 CONTINUE @@ -2070,6 +2125,10 @@ SUBROUTINE PDLAHQR( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, 450 CONTINUE CALL DGSUM2D( CONTXT, 'All', ' ', N, 1, WR, N, -1, -1 ) CALL DGSUM2D( CONTXT, 'All', ' ', N, 1, WI, N, -1, -1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * END OF PDLAHQR diff --git a/SRC/pdlahrd.f b/SRC/pdlahrd.f index da6f7267..30b9c536 100644 --- a/SRC/pdlahrd.f +++ b/SRC/pdlahrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, $ DESCY, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IY, JA, JY, K, N, NB * .. @@ -158,15 +165,49 @@ SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.LE.1 ) - $ RETURN + IF( N.LE.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IY, JA, JY, K, N, NB, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAHRD inputs:,IA:',I5,',IY:',I5,',JA:',I5, + $ ',JY:',I5,',K:',I5,',N:',I5,',NB:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IOFF = MOD( JA-1, DESCA( NB_ ) ) CALL INFOG2L( IA+K, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, @@ -280,6 +321,10 @@ SUBROUTINE PDLAHRD( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY, * CALL PDELSET( A, K+NB+IA-1, J, DESCA, EI ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAHRD diff --git a/SRC/pdlamch.f b/SRC/pdlamch.f index 9a3ebb6c..99efe269 100644 --- a/SRC/pdlamch.f +++ b/SRC/pdlamch.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER CMACH INTEGER ICTXT @@ -62,7 +69,23 @@ DOUBLE PRECISION FUNCTION PDLAMCH( ICTXT, CMACH ) DOUBLE PRECISION DLAMCH EXTERNAL DLAMCH, LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * TEMP = DLAMCH( CMACH ) IDUMM = 0 diff --git a/SRC/pdlamr1d.f b/SRC/pdlamr1d.f index 08db3c6f..6f9d7d14 100644 --- a/SRC/pdlamr1d.f +++ b/SRC/pdlamr1d.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) * and University of California, Berkeley. * October 15, 1999 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, IB, JA, JB, N * .. @@ -105,15 +112,42 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) INTEGER NUMROC EXTERNAL NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * DO 10 I = 1, DLEN_ DESCAA( I ) = DESCA( I ) @@ -129,6 +163,18 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) CALL PDGEMR2D( 1, N, A, IA, JA, DESCAA, B, IB, JB, DESCBB, ICTXT ) * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, IB, JA, JB, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAMR1D inputs:,IA:',I5,',IB:',I5,',JA:',I5, + $ ',JB:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NQ = NUMROC( N, DESCB( NB_ ), MYCOL, 0, NPCOL ) * IF( MYROW.EQ.0 ) THEN @@ -137,6 +183,10 @@ SUBROUTINE PDLAMR1D( N, A, IA, JA, DESCA, B, IB, JB, DESCB ) CALL DGEBR2D( ICTXT, 'C', ' ', NQ, 1, B, NQ, 0, MYCOL ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAMR1D diff --git a/SRC/pdlamve.f b/SRC/pdlamve.f index 7686cc55..711db2e5 100644 --- a/SRC/pdlamve.f +++ b/SRC/pdlamve.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, DWORK ) * @@ -8,6 +14,7 @@ SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -165,13 +172,42 @@ SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Find underlying mesh properties. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, JA, JB, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAMVE inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',JA:',I5,',JB:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Decode input parameters. * UPPER = LSAME( UPLO, 'U' ) @@ -198,6 +234,10 @@ SUBROUTINE PDLAMVE( UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAMVE diff --git a/SRC/pdlange.f b/SRC/pdlange.f index 98be0941..60310cc8 100644 --- a/SRC/pdlange.f +++ b/SRC/pdlange.f @@ -1,5 +1,12 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, $ WORK ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -175,12 +182,40 @@ DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, IA, JA, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLANGE inputs:,NORM:',A5,',IA:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ, $ IAROW, IACOL ) @@ -332,6 +367,10 @@ DOUBLE PRECISION FUNCTION PDLANGE( NORM, M, N, A, IA, JA, DESCA, * PDLANGE = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANGE diff --git a/SRC/pdlanhs.f b/SRC/pdlanhs.f index 2efe6a6f..6fedf905 100644 --- a/SRC/pdlanhs.f +++ b/SRC/pdlanhs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, $ WORK ) * @@ -6,6 +12,7 @@ DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER NORM INTEGER IA, JA, N @@ -169,12 +176,40 @@ DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, IA, JA, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT(' inputs:,NORM:',A5,',IA:',I5,',JA:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -733,6 +768,10 @@ DOUBLE PRECISION FUNCTION PDLANHS( NORM, N, A, IA, JA, DESCA, * PDLANHS = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANHS diff --git a/SRC/pdlansy.f b/SRC/pdlansy.f index 9d100b22..54063997 100644 --- a/SRC/pdlansy.f +++ b/SRC/pdlansy.f @@ -1,5 +1,12 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, $ DESCA, WORK ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -193,12 +200,40 @@ DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters and local indexes. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) NORM, UPLO, IA, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT(' inputs:,NORM:',A5,',UPLO:',A5,',IA:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, $ IIA, JJA, IAROW, IACOL ) * @@ -857,6 +892,10 @@ DOUBLE PRECISION FUNCTION PDLANSY( NORM, UPLO, N, A, IA, JA, * PDLANSY = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANSY diff --git a/SRC/pdlantr.f b/SRC/pdlantr.f index 9484dfbd..ab476569 100644 --- a/SRC/pdlantr.f +++ b/SRC/pdlantr.f @@ -1,5 +1,12 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, $ IA, JA, DESCA, WORK ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -191,12 +198,41 @@ DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, NORM, UPLO, IA, JA, M, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT(' inputs:,DIAG:',A5,',NORM:',A5,',UPLO:',A5, + $ ',IA:',I5,',JA:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * UDIAG = LSAME( DIAG, 'U' ) CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, @@ -1097,6 +1133,10 @@ DOUBLE PRECISION FUNCTION PDLANTR( NORM, UPLO, DIAG, M, N, A, * PDLANTR = VALUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLANTR diff --git a/SRC/pdlapiv.f b/SRC/pdlapiv.f index 50613de0..535b3d9d 100644 --- a/SRC/pdlapiv.f +++ b/SRC/pdlapiv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, $ DESCA, IPIV, IP, JP, DESCIP, IWORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER*1 DIREC, PIVROC, ROWCOL INTEGER IA, IP, JA, JP, M, N @@ -222,19 +229,55 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIREC, PIVROC, ROWCOL, IA, IP, + $ JA, JP, M, N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDLAPIV inputs:,DIREC:',A5,',PIVROC:',A5, + $ ',ROWCOL:',A5,',IA:',I5,',IP:',I5, + $ ',JA:',I5,',JP:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF ROWPVT = LSAME( ROWCOL, 'R' ) * * If we're pivoting the rows of sub( A ) * IF( ROWPVT ) THEN - IF( M.LE.1 .OR. N.LT.1 ) - $ RETURN + IF( M.LE.1 .OR. N.LT.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * If the pivot vector is already distributed correctly * @@ -293,8 +336,13 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, * Otherwise, we're pivoting the columns of sub( A ) * ELSE - IF( M.LT.1 .OR. N.LE.1 ) - $ RETURN + IF( M.LT.1 .OR. N.LE.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * If the pivot vector is already distributed correctly * @@ -349,6 +397,10 @@ SUBROUTINE PDLAPIV( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, END IF END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAPIV diff --git a/SRC/pdlapv2.f b/SRC/pdlapv2.f index a8e48204..cc137bf1 100644 --- a/SRC/pdlapv2.f +++ b/SRC/pdlapv2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, $ IP, JP, DESCIP ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, IP, JA, JP, M, N @@ -167,15 +174,41 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * ROWPVT = LSAME( ROWCOL, 'R' ) IF( ROWPVT ) THEN - IF( M.LE.1 .OR. N.LT.1 ) - $ RETURN + IF( M.LE.1 .OR. N.LT.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF ELSE - IF( M.LT.1 .OR. N.LE.1 ) - $ RETURN + IF( M.LT.1 .OR. N.LE.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF END IF FORWRD = LSAME( DIREC, 'F' ) * @@ -188,6 +221,20 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIREC, ROWCOL, IA, IP, JA, JP, + $ M, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLAPV2 inputs:,DIREC:',A5,',ROWCOL:',A5, + $ ',IA:',I5,',IP:',I5,',JA:',I5,',JP:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * If I'm applying pivots from beginning to end (e.g., repeating * pivoting done earlier). Thus this section computes P * sub( A ). * @@ -406,6 +453,10 @@ SUBROUTINE PDLAPV2( DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End PDLAPV2 diff --git a/SRC/pdlaqge.f b/SRC/pdlaqge.f index 9203c371..fb95e914 100644 --- a/SRC/pdlaqge.f +++ b/SRC/pdlaqge.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, $ AMAX, EQUED ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED INTEGER IA, JA, M, N @@ -177,12 +184,32 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * IF( M.LE.0 .OR. N.LE.0 ) THEN EQUED = 'N' +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -190,6 +217,21 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, IA, JA, M, N, AMAX, COLCND, + $ ROWCND, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDLAQGE inputs:,EQUED:',A5,',IA:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',AMAX:',F9.4, + $ ',COLCND:',F9.4,',ROWCND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) IROFF = MOD( IA-1, DESCA( MB_ ) ) @@ -263,6 +305,10 @@ SUBROUTINE PDLAQGE( M, N, A, IA, JA, DESCA, R, C, ROWCND, COLCND, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAQGE diff --git a/SRC/pdlaqr0.f b/SRC/pdlaqr0.f index 9153500d..0f34df04 100644 --- a/SRC/pdlaqr0.f +++ b/SRC/pdlaqr0.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, $ DESCH, WR, WI, ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, $ IWORK, LIWORK, INFO, RECLEVEL ) @@ -10,6 +16,7 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -290,10 +297,43 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F INFO = 0 ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHI, IHIZ, ILO, ILOZ, INFO, + $ LIWORK, LWORK, N, RECLEVEL, + $ WANTT, WANTZ, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR0 inputs:,IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',INFO:',I5,',LIWORK:',I5, + $ ',LWORK:',I5,',N:',I5,',RECLEVEL:',I5, + $ ',WANTT:',L2,',WANTZ:',L2, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL RECURSION = RECLEVEL .LT. RECMAX * @@ -302,6 +342,10 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, IF( N.EQ.0 ) THEN WORK( 1 ) = ONE IWORK( 1 ) = 1 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -425,6 +469,10 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, IF( LQUERY ) THEN WORK( 1 ) = DBLE( LWKOPT ) IWORK( 1 ) = LIWKOPT +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -922,6 +970,10 @@ RECURSIVE SUBROUTINE PDLAQR0( WANTT, WANTZ, N, ILO, IHI, H, IWORK( 2 ) = SWEEP IWORK( 3 ) = TOTNS END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAQR0 diff --git a/SRC/pdlaqr1.f b/SRC/pdlaqr1.f index 2226c757..09cfffec 100644 --- a/SRC/pdlaqr1.f +++ b/SRC/pdlaqr1.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ DESCA, WR, WI, ILOZ, IHIZ, Z, $ DESCZ, WORK, LWORK, IWORK, @@ -11,6 +17,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -301,13 +308,34 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, MIN, MOD, SIGN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * ITERMAX = 30*( IHI-ILO+1 ) - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -318,6 +346,21 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) WANTT, WANTZ, IHI, IHIZ, ILO, + $ ILOZ, ILWORK, INFO, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR1 inputs:,WANTT:',L2,',WANTZ:',L2, + $ ',IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',ILWORK:',I5,',INFO:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL NUM = NPROW*NPCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) @@ -341,6 +384,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IF( LWORK.EQ.-1 .OR. ILWORK.EQ.-1 ) THEN WORK( 1 ) = DBLE( LWKOPT ) IWORK( 1 ) = 3 +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LWORK.LT.LWKOPT ) THEN INFO = -15 @@ -371,6 +418,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IF( INFO.LT.0 ) THEN CALL PXERBLA( CONTXT, 'PDLAQR1', -INFO ) WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -400,6 +451,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, END IF WI( ILO ) = ZERO WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -415,6 +470,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ WORK( S2+1 ), NH, WORK( S3+1 ), 4*LDS*LDS, $ INFO ) WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1326,7 +1385,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ ( MOD( ISTART-1, HBL ).LT.HBL-2 ) .AND. $ ( ICURROW( KI ).EQ.MYROW ) ) THEN IROW1 = MIN( K2( KI )+1, I-1 ) + 1 - CALL INFOG1L( IROW1, HBL, NPCOL, MYCOL, DESCA(CSRC_), + CALL INFOG1L( IROW1, HBL, NPCOL, MYCOL, DESCA(CSRC_), $ ITMP1, ITMP2 ) ITMP2 = NUMROC( I2, HBL, MYCOL, DESCA(CSRC_), NPCOL ) II = KROW( KI ) @@ -1373,7 +1432,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * IROW1 = KROW( KI ) IROW2 = KP2ROW( KI ) - CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, + CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), ICOL1, ICOL2 ) ICOL2 = NUMROC(I2,HBL,MYCOL,DESCA(CSRC_),NPCOL ) IF( ( MOD( K-1, HBL ).LT.HBL-2 ) .OR. @@ -1441,7 +1500,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * IROW1 = KROW( KI ) + K - ISTART IROW2 = KP2ROW( KI ) + K - ISTART - CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, + CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_),ICOL1, ICOL2 ) ICOL2 = NUMROC(I2,HBL,MYCOL,DESCA(CSRC_),NPCOL ) IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND. @@ -1531,7 +1590,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * IROW1 = KROW( KI ) + K - ISTART IROW2 = KP2ROW( KI ) + K - ISTART - CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, + CALL INFOG1L( ITMP1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), ICOL1, ICOL2 ) ICOL2 = NUMROC(I2,HBL,MYCOL,DESCA(CSRC_),NPCOL ) IF( ( MOD( K-1, HBL ).EQ.HBL-2 ) .AND. @@ -1972,9 +2031,9 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, CALL DGESD2D( CONTXT, LIHIH-LILOH+1, 1, $ A( ( ITMP1-1 )*LDA+LILOH ), $ LDA, MYROW, RIGHT ) - CALL INFOG1L( K, HBL, NPCOL, MYCOL, + CALL INFOG1L( K, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), ITMP1, ITMP2 ) - ITMP2 = NUMROC( K+1, HBL, MYCOL, + ITMP2 = NUMROC( K+1, HBL, MYCOL, $ DESCA(CSRC_), NPCOL ) CALL DGERV2D( CONTXT, LIHIH-LILOH+1, 1, $ A( ( ITMP1-1 )*LDA+LILOH ), @@ -2090,7 +2149,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IF( ( MOD( K1( KI )-1, HBL ).GE.HBL-2 ) .AND. $ ( ( MYCOL.EQ.ICURCOL( KI ) ) .OR. ( RIGHT.EQ. $ ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN - CALL INFOG1L( K2( KI )+1, HBL, NPCOL, MYCOL, + CALL INFOG1L( K2( KI )+1, HBL, NPCOL, MYCOL, $ DESCA(CSRC_), KCOL( KI ), ITMP2 ) ITMP2 = NUMROC( N, HBL, MYCOL, DESCA(CSRC_), NPCOL ) END IF @@ -2099,7 +2158,7 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ ICURCOL( KI ) ) ) .AND. ( NPCOL.GT.1 ) ) THEN CALL INFOG1L( 1, HBL, NPCOL, MYCOL,DESCA(CSRC_),ITMP2, $ KP2COL( KI ) ) - KP2COL( KI ) = NUMROC( K2( KI )+3, HBL, MYCOL, + KP2COL( KI ) = NUMROC( K2( KI )+3, HBL, MYCOL, $ DESCA(CSRC_), NPCOL ) END IF K1( KI ) = K2( KI ) + 1 @@ -2131,6 +2190,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, * INFO = I WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * 430 CONTINUE @@ -2200,6 +2263,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, $ INFO ) IF( INFO.NE.0 ) THEN WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF IF( NODE.NE.0 ) THEN @@ -2239,6 +2306,10 @@ RECURSIVE SUBROUTINE PDLAQR1( WANTT, WANTZ, N, ILO, IHI, A, IWORK( 1 ) = TOTIT IWORK( 2 ) = TOTSW IWORK( 3 ) = TOTNS +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * END OF PDLAQR1 diff --git a/SRC/pdlaqr2.f b/SRC/pdlaqr2.f index bae5fb2b..c910d523 100644 --- a/SRC/pdlaqr2.f +++ b/SRC/pdlaqr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, $ ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT, $ V, LDV, WR, WI, WORK, LWORK ) @@ -9,6 +15,7 @@ SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -241,12 +248,33 @@ SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -257,6 +285,23 @@ SUBROUTINE PDLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHIZ, ILOZ, KBOT, KTOP, LDT, + $ LDV, LWORK, N, ND, NS, + $ NW, WANTT, WANTZ, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR2 inputs:,IHIZ:',I5,',ILOZ:',I5, + $ ',KBOT:',I5,',KTOP:',I5,',LDT:',I5,',LDV:',I5, + $ ',LWORK:',I5,',N:',I5,',ND:',I5, + $ ',NS:',I5,',NW:',I5,',WANTT:',L2, + $ ',WANTZ:',L2,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) diff --git a/SRC/pdlaqr3.f b/SRC/pdlaqr3.f index caa09756..0581919d 100644 --- a/SRC/pdlaqr3.f +++ b/SRC/pdlaqr3.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, $ DESCH, ILOZ, IHIZ, Z, DESCZ, NS, ND, $ SR, SI, V, DESCV, NH, T, DESCT, NV, @@ -12,6 +18,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -174,7 +181,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * are stored in SR(KBOT-ND+1) through SR(KBOT) and * SI(KBOT-ND+1) through SI(KBOT), respectively. * -* V (global workspace) DOUBLE PRECISION array, dimension +* V (global workspace) DOUBLE PRECISION array, dimension * (DESCV(LLD_),*) * An NW-by-NW distributed work array. * @@ -184,7 +191,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * NH (input) INTEGER scalar * The number of columns of T. NH.GE.NW. * -* T (global workspace) DOUBLE PRECISION array, dimension +* T (global workspace) DOUBLE PRECISION array, dimension * (DESCV(LLD_),*) * * DESCT (global and local input) INTEGER array of dimension DLEN_. @@ -194,7 +201,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * The number of rows of work array WV available for * workspace. NV.GE.NW. * -* WV (global workspace) DOUBLE PRECISION array, dimension +* WV (global workspace) DOUBLE PRECISION array, dimension * (DESCW(LLD_),*) * * DESCW (global and local input) INTEGER array of dimension DLEN_. @@ -282,9 +289,42 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHIZ, ILOZ, KBOT, KTOP, LWORK, + $ N, ND, NH, NS, NV, + $ NW, LIWORK, RECLEVEL, WANTT, WANTZ, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR3 inputs:,IHIZ:',I5,',ILOZ:',I5, + $ ',KBOT:',I5,',KTOP:',I5,',LWORK:',I5, + $ ',N:',I5,',ND:',I5,',NH:',I5,',NS:',I5, + $ ',NV:',I5,',NW:',I5,',LIWORK:',I5, + $ ',RECLEVEL:',I5,',WANTT:',L2,',WANTZ:',L2, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Extract local leading dimensions, blockfactors, offset for @@ -342,7 +382,7 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, $ .AND. RECLEVEL.LT.RECMAX ) THEN CALL PDLAQR0( .TRUE., .TRUE., JW+IROFFH, 1+IROFFH, $ JW+IROFFH, T, DESCT, SR, SI, 1, JW, V, DESCV, - $ WORK, -1, IWORK, LIWORK-NSEL, INFQR, + $ WORK, -1, IWORK, LIWORK-NSEL, INFQR, $ RECLEVEL+1 ) LWK3 = INT( WORK( 1 ) ) IWRK1 = IWORK( 1 ) @@ -410,18 +450,33 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, * IWORK(1:NSEL) is used as the array SELECT for PDTRORD. * IWORK( 1 ) = ILWKOPT + NSEL - IF( LQUERY ) - $ RETURN + IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Nothing to do for an empty active block ... NS = 0 ND = 0 - IF( KTOP.GT.KBOT ) - $ RETURN + IF( KTOP.GT.KBOT ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ... nor for an empty deflation window. * - IF( NW.LT.1 ) - $ RETURN + IF( NW.LT.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Machine constants. * @@ -457,6 +512,10 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, IF( KWTOP.GT.KTOP ) $ CALL PDELSET( H, KWTOP, KWTOP-1 , DESCH, ZERO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -489,6 +548,10 @@ RECURSIVE SUBROUTINE PDLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, CALL PDELSET( H, I+1, I+1, DESCH, DD ) END IF WORK( 1 ) = DBLE( LWKOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * diff --git a/SRC/pdlaqr4.f b/SRC/pdlaqr4.f index 105deaa7..c626b396 100644 --- a/SRC/pdlaqr4.f +++ b/SRC/pdlaqr4.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, $ ILOZ, IHIZ, Z, DESCZ, T, LDT, V, LDV, WORK, $ LWORK, INFO ) @@ -9,6 +15,7 @@ SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -226,14 +233,35 @@ SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 * NH = IHI - ILO + 1 NZ = IHIZ - ILOZ + 1 - IF( N.EQ.0 .OR. NH.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NH.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * NODE (IAFIRST,JAFIRST) OWNS A(1,1) * @@ -244,6 +272,21 @@ SUBROUTINE PDLAQR4( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI, JAFIRST = DESCA( CSRC_ ) LDZ = DESCZ( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) WANTT, WANTZ, IHI, IHIZ, ILO, + $ ILOZ, INFO, LDT, LDV, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR4 inputs:,WANTT:',L2,',WANTZ:',L2, + $ ',IHI:',I5,',IHIZ:',I5,',ILO:',I5, + $ ',ILOZ:',I5,',INFO:',I5,',LDT:',I5,',LDV:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NODE = MYROW*NPCOL + MYCOL LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) diff --git a/SRC/pdlaqr5.f b/SRC/pdlaqr5.f index 490c42fb..11d6a788 100644 --- a/SRC/pdlaqr5.f +++ b/SRC/pdlaqr5.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK, $ LWORK, IWORK, LIWORK ) @@ -9,6 +15,7 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -69,7 +76,7 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * parts of the NSHFTS shifts of origin that define the * multi-shift QR sweep. * -* H (local input/output) DOUBLE PRECISION array of size +* H (local input/output) DOUBLE PRECISION array of size * (DESCH(LLD_),*) * On input H contains a Hessenberg matrix. On output a * multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied @@ -179,11 +186,44 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, EXTERNAL DGEMM, DLABAD, DLAMOV, DLAQR1, DLARFG, DLASET, $ DTRMM, DLAQR6 * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * INFO = 0 ICTXT = DESCH( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IHIZ, ILOZ, KACC22, KBOT, KTOP, + $ N, NSHFTS, LWORK, + $ LIWORK, WANTT, WANTZ, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAQR5 inputs:,IHIZ:',I5,',ILOZ:',I5, + $ ',KACC22:',I5,',KBOT:',I5,',KTOP:',I5, + $ ',N:',I5,',NSHFTS:',I5,',LWORK:',I5, + $ ',LIWORK:',I5,',WANTT:',L2,',WANTZ:',L2, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL LLDH = DESCH( LLD_ ) LLDZ = DESCZ( LLD_ ) @@ -193,14 +233,24 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * * If there are no shifts, then there is nothing to do. * - IF( .NOT. LQUERY .AND. NSHFTS.LT.2 ) - $ RETURN + IF( .NOT. LQUERY .AND. NSHFTS.LT.2 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * If the active block is empty or 1-by-1, then there * is nothing to do. * - IF( .NOT. LQUERY .AND. KTOP.GE.KBOT ) - $ RETURN + IF( .NOT. LQUERY .AND. KTOP.GE.KBOT ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Shuffle shifts into pairs of real shifts and pairs of * complex conjugate shifts assuming complex conjugate @@ -307,11 +357,19 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, $ MAX( HROWS*NB, HCOLS*NB ) WORK(1) = DBLE(LWKOPT) IWORK(1) = 5*NUMWIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Check if KTOP and KBOT are valid. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( KTOP.LT.1 .OR. KBOT.GT.N ) RETURN * * Create and chase NUMWIN chains of NBMPS bulges. @@ -941,6 +999,10 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * * If we have no more windows, return. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( ANMWIN.LT.1 ) RETURN * ELSE @@ -2247,6 +2309,10 @@ SUBROUTINE PDLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, * * If we have no more windows, return. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( ANMWIN.LT.1 ) RETURN * * Check for any more windows to bring over the border. diff --git a/SRC/pdlaqsy.f b/SRC/pdlaqsy.f index c2763ee9..58bfd7fc 100644 --- a/SRC/pdlaqsy.f +++ b/SRC/pdlaqsy.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, $ AMAX, EQUED ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED, UPLO INTEGER IA, JA, N @@ -179,12 +186,32 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * IF( N.LE.0 ) THEN EQUED = 'N' +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -192,6 +219,20 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, UPLO, IA, JA, N, AMAX, + $ SCOND, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLAQSY inputs:,EQUED:',A5,',UPLO:',A5, + $ ',IA:',I5,',JA:',I5,',N:',I5,',AMAX:',F9.4, + $ ',SCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) LDA = DESCA( LLD_ ) @@ -352,6 +393,10 @@ SUBROUTINE PDLAQSY( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAQSY diff --git a/SRC/pdlared1d.f b/SRC/pdlared1d.f index 41a5fa5f..bd94f746 100644 --- a/SRC/pdlared1d.f +++ b/SRC/pdlared1d.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * and University of California, Berkeley. * December 12, 2005 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. @@ -138,12 +145,46 @@ SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, LWORK, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLARED1D inputs:,IA:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NB = DESC( MB_ ) * * @@ -166,6 +207,10 @@ SUBROUTINE PDLARED1D( N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK ) 20 CONTINUE 30 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARED1D diff --git a/SRC/pdlared2d.f b/SRC/pdlared2d.f index 43099014..8d1042c3 100644 --- a/SRC/pdlared2d.f +++ b/SRC/pdlared2d.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * and University of California, Berkeley. * December 12, 2005 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, LWORK, N * .. @@ -135,12 +142,46 @@ SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( DESC( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, LWORK, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLARED2D inputs:,IA:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF MB = DESC( MB_ ) * DO 30 PROW = 0, NPROW - 1 @@ -163,6 +204,10 @@ SUBROUTINE PDLARED2D( N, IA, JA, DESC, BYROW, BYALL, WORK, LWORK ) 20 CONTINUE 30 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PSLARED2D diff --git a/SRC/pdlarf.f b/SRC/pdlarf.f index 41368d6d..bd3a4104 100644 --- a/SRC/pdlarf.f +++ b/SRC/pdlarf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, $ C, IC, JC, DESCC, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, M, N @@ -257,18 +264,52 @@ SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, IC, INCV, IV, JC, JV, + $ M, N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARF inputs:,SIDE:',A5,',IC:',I5,',INCV:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Figure local indexes * CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC, JJC, @@ -805,6 +846,10 @@ SUBROUTINE PDLARF( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARF diff --git a/SRC/pdlarfb.f b/SRC/pdlarfb.f index 2c3b426f..94887287 100644 --- a/SRC/pdlarfb.f +++ b/SRC/pdlarfb.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, $ JV, DESCV, T, C, IC, JC, DESCC, WORK ) * @@ -5,6 +11,7 @@ SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS, DIRECT, STOREV INTEGER IC, IV, JC, JV, K, M, N @@ -247,17 +254,53 @@ SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, DIRECT, STOREV, + $ IC, IV, JC, JV, K, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLARFB inputs:,SIDE:',A5,',TRANS:',A5, + $ ',DIRECT:',A5,',STOREV:',A5,',IC:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5,',K:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( LSAME( TRANS, 'N' ) ) THEN TRANST = 'T' @@ -878,6 +921,10 @@ SUBROUTINE PDLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, IV, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARFB diff --git a/SRC/pdlarfg.f b/SRC/pdlarfg.f index 653dfa5d..a7567557 100644 --- a/SRC/pdlarfg.f +++ b/SRC/pdlarfg.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, $ TAU ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IAX, INCX, IX, JAX, JX, N DOUBLE PRECISION ALPHA @@ -166,12 +173,41 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, * .. Intrinsic Functions .. INTRINSIC ABS, SIGN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IAX, INCX, IX, JAX, JX, N, ALPHA, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARFG inputs:,IAX:',I5,',INCX:',I5,',IX:',I5, + $ ',JAX:',I5,',JX:',I5,',N:',I5, + $ ',ALPHA:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( INCX.EQ.DESCX( M_ ) ) THEN * @@ -180,8 +216,13 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, CALL INFOG2L( IX, JAX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * - IF( MYROW.NE.IXROW ) - $ RETURN + IF( MYROW.NE.IXROW ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Broadcast X(IAX,JAX) across the process row. * @@ -203,8 +244,13 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, CALL INFOG2L( IAX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, $ IIAX, JJAX, IXROW, IXCOL ) * - IF( MYCOL.NE.IXCOL ) - $ RETURN + IF( MYCOL.NE.IXCOL ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Broadcast X(IAX,JAX) across the process column. * @@ -223,6 +269,10 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, * IF( N.LE.0 ) THEN TAU( INDXTAU ) = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -274,6 +324,10 @@ SUBROUTINE PDLARFG( N, ALPHA, IAX, JAX, X, IX, JX, DESCX, INCX, END IF END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARFG diff --git a/SRC/pdlarft.f b/SRC/pdlarft.f index 9a3ae474..4980f230 100644 --- a/SRC/pdlarft.f +++ b/SRC/pdlarft.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N @@ -197,15 +204,49 @@ SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.LE.0 .OR. K.LE.0 ) - $ RETURN + IF( N.LE.0 .OR. K.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIRECT, STOREV, IV, JV, K, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARFT inputs:,DIRECT:',A5,',STOREV:',A5, + $ ',IV:',I5,',JV:',I5,',K:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * FORWARD = LSAME( DIRECT, 'F' ) CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, @@ -531,6 +572,10 @@ SUBROUTINE PDLARFT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARFT diff --git a/SRC/pdlarz.f b/SRC/pdlarz.f index f45c1372..d0798568 100644 --- a/SRC/pdlarz.f +++ b/SRC/pdlarz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, $ IC, JC, DESCC, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE INTEGER IC, INCV, IV, JC, JV, L, M, N @@ -266,18 +273,53 @@ SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters. * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, IC, INCV, IV, JC, JV, + $ L, M, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLARZ inputs:,SIDE:',A5,',IC:',I5,',INCV:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5, + $ ',L:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Figure local indexes * LEFT = LSAME( SIDE, 'L' ) @@ -907,6 +949,10 @@ SUBROUTINE PDLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARZ diff --git a/SRC/pdlarzb.f b/SRC/pdlarzb.f index 096fba5e..b6736725 100644 --- a/SRC/pdlarzb.f +++ b/SRC/pdlarzb.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK ) * @@ -5,6 +11,7 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIRECT, SIDE, STOREV, TRANS INTEGER IC, IV, JC, JV, K, L, M, N @@ -252,18 +259,54 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, INTEGER ICEIL, NUMROC EXTERNAL ICEIL, LSAME, NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) - $ RETURN + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCC( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIRECT, SIDE, STOREV, TRANS, + $ IC, IV, JC, JV, K, L, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARZB inputs:,DIRECT:',A5,',SIDE:',A5, + $ ',STOREV:',A5,',TRANS:',A5,',IC:',I5, + $ ',IV:',I5,',JC:',I5,',JV:',I5,',K:',I5, + $ ',L:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Check for currently supported options * INFO = 0 @@ -275,6 +318,10 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLARZB', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -604,6 +651,10 @@ SUBROUTINE PDLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARZB diff --git a/SRC/pdlarzt.f b/SRC/pdlarzt.f index 5baac6cd..affd629f 100644 --- a/SRC/pdlarzt.f +++ b/SRC/pdlarzt.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, $ T, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIRECT, STOREV INTEGER IV, JV, K, N @@ -209,13 +216,42 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * .. Intrinsic Functions .. INTRINSIC MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCV( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIRECT, STOREV, IV, JV, K, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLARZT inputs:,DIRECT:',A5,',STOREV:',A5, + $ ',IV:',I5,',JV:',I5,',K:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Check for currently supported options * INFO = 0 @@ -227,6 +263,10 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLARZT', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -292,6 +332,10 @@ SUBROUTINE PDLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLARZT diff --git a/SRC/pdlascl.f b/SRC/pdlascl.f index 3925ed45..3f9d37fa 100644 --- a/SRC/pdlascl.f +++ b/SRC/pdlascl.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TYPE INTEGER IA, INFO, JA, M, N @@ -161,13 +168,43 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TYPE, IA, INFO, JA, M, N, CFROM, + $ CTO, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLASCL inputs:,TYPE:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',M:',I5,',N:',I5, + $ ',CFROM:',F9.4,',CTO:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * IF( NPROW.EQ.-1 ) THEN @@ -199,13 +236,22 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLASCL', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. M.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. M.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get machine parameters * @@ -521,6 +567,10 @@ SUBROUTINE PDLASCL( TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, IF( .NOT.DONE ) $ GO TO 10 * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASCL diff --git a/SRC/pdlase2.f b/SRC/pdlase2.f index a23e640b..ac5c292c 100644 --- a/SRC/pdlase2.f +++ b/SRC/pdlase2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N @@ -154,14 +161,48 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, JA, M, N, ALPHA, BETA, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASE2 inputs:,UPLO:',A5,',IA:',I5,',JA:',I5, + $ ',M:',I5,',N:',I5,',ALPHA:',F9.4, + $ ',BETA:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) @@ -212,8 +253,13 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) IF( MYCOL.EQ.IACOL ) THEN * MPA = NUMROC( M+IROFFA, MBA, MYROW, IAROW, NPROW ) - IF( MPA.LE.0 ) - $ RETURN + IF( MPA.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYROW.EQ.IAROW ) $ MPA = MPA - IROFFA MYDIST = MOD( MYROW-IAROW+NPROW, NPROW ) @@ -321,8 +367,13 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) IF( MYROW.EQ.IAROW ) THEN * NQA = NUMROC( N+ICOFFA, NBA, MYCOL, IACOL, NPCOL ) - IF( NQA.LE.0 ) - $ RETURN + IF( NQA.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IF( MYCOL.EQ.IACOL ) $ NQA = NQA - ICOFFA MYDIST = MOD( MYCOL-IACOL+NPCOL, NPCOL ) @@ -404,6 +455,10 @@ SUBROUTINE PDLASE2( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASE2 diff --git a/SRC/pdlaset.f b/SRC/pdlaset.f index f297b82e..5b8d94b0 100644 --- a/SRC/pdlaset.f +++ b/SRC/pdlaset.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, M, N @@ -149,10 +156,31 @@ SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( M.LE.( DESCA( MB_ ) - MOD( IA-1, DESCA( MB_ ) ) ) .OR. $ N.LE.( DESCA( NB_ ) - MOD( JA-1, DESCA( NB_ ) ) ) ) THEN @@ -213,6 +241,10 @@ SUBROUTINE PDLASET( UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASET diff --git a/SRC/pdlasmsub.f b/SRC/pdlasmsub.f index 1feda040..5c19aa7b 100644 --- a/SRC/pdlasmsub.f +++ b/SRC/pdlasmsub.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER I, K, L, LWORK DOUBLE PRECISION SMLNUM @@ -165,7 +172,23 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) @@ -174,6 +197,19 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) JAFIRST = DESCA( CSRC_ ) ULP = PDLAMCH( CONTXT, 'PRECISION' ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) I, K, L, LWORK, SMLNUM, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASMSUB inputs:,I:',I5,',K:',I5,',L:',I5, + $ ',LWORK:',I5,',SMLNUM:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) @@ -197,6 +233,10 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) * * Error! * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF CALL INFOG2L( I, I, DESCA, NPROW, NPCOL, MYROW, MYCOL, IROW1, @@ -363,6 +403,10 @@ SUBROUTINE PDLASMSUB( A, DESCA, I, L, K, SMLNUM, BUF, LWORK ) 50 CONTINUE CALL IGAMX2D( CONTXT, 'ALL', ' ', 1, 1, K, 1, ITMP1, ITMP2, -1, $ -1, -1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASMSUB diff --git a/SRC/pdlasrt.f b/SRC/pdlasrt.f index 79cf342d..5c66662c 100644 --- a/SRC/pdlasrt.f +++ b/SRC/pdlasrt.f @@ -1,10 +1,17 @@ - SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* + SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * * -- ScaLAPACK auxiliary routine (version 2.0.2) -- * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER ID INTEGER INFO, IQ, JQ, LIWORK, LWORK, N @@ -101,18 +108,59 @@ SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN * - IF( N.EQ.0 ) - $ RETURN +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCQ( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) ID, INFO, IQ, JQ, LIWORK, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDLASRT inputs:,ID:',A5,',INFO:',I5,',IQ:',I5, + $ ',JQ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -141,6 +189,10 @@ SUBROUTINE PDLASRT( ID, N, D, Q, IQ, JQ, DESCQ, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDLASRT', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * diff --git a/SRC/pdlassq.f b/SRC/pdlassq.f index a6826a66..9b2aa4ab 100644 --- a/SRC/pdlassq.f +++ b/SRC/pdlassq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SCALE, SUMSQ @@ -161,13 +168,42 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IX, INCX, JX, N, SCALE, SUMSQ, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASSQ inputs:,IX:',I5,',INCX:',I5,',JX:',I5, + $ ',N:',I5,',SCALE:',F9.4,',SUMSQ:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Figure local indexes * CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, @@ -178,8 +214,13 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * X is rowwise distributed. * - IF( MYROW.NE.IXROW ) - $ RETURN + IF( MYROW.NE.IXROW ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF ICOFF = MOD( JX, DESCX( NB_ ) ) NQ = NUMROC( N+ICOFF, DESCX( NB_ ), MYCOL, IXCOL, NPCOL ) IF( MYCOL.EQ.IXCOL ) @@ -218,8 +259,13 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * * X is columnwise distributed. * - IF( MYCOL.NE.IXCOL ) - $ RETURN + IF( MYCOL.NE.IXCOL ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF IROFF = MOD( IX, DESCX( MB_ ) ) NP = NUMROC( N+IROFF, DESCX( MB_ ), MYROW, IXROW, NPROW ) IF( MYROW.EQ.IXROW ) @@ -256,6 +302,10 @@ SUBROUTINE PDLASSQ( N, X, IX, JX, DESCX, INCX, SCALE, SUMSQ ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLASSQ diff --git a/SRC/pdlaswp.f b/SRC/pdlaswp.f index 0ba8ed17..519bfd8e 100644 --- a/SRC/pdlaswp.f +++ b/SRC/pdlaswp.f @@ -1,5 +1,10 @@ +* +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. +* * -- ScaLAPACK routine -- -* Copyright (c) 2022 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, $ IPIV ) @@ -9,6 +14,7 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIREC, ROWCOL INTEGER IA, JA, K1, K2, N @@ -153,21 +159,48 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, LOGICAL LSAME EXTERNAL LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_ENTRY(__FILE__, __LINE__, ' ') -#endif +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * IF( N.EQ.0 ) THEN -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIREC, ROWCOL, IA, JA, K1, K2, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLASWP inputs:,DIREC:',A5,',ROWCOL:',A5, + $ ',IA:',I5,',JA:',I5,',K1:',I5,',K2:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5 , + $ ',MYROW:',I5,',MYCOL:',I5,A5) + AOCL_DTL_LOG_ENTRY_F + END IF * IF( LSAME( ROWCOL, 'R' ) ) THEN IF( LSAME( DIREC, 'F' ) ) THEN @@ -211,9 +244,10 @@ SUBROUTINE PDLASWP( DIREC, ROWCOL, N, A, IA, JA, DESCA, K1, K2, END IF END IF * -#ifdef AOCL_DTL - CALL AOCL_DTL_TRACE_EXIT(__FILE__, __LINE__, ' ') -#endif +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End PDLASWP diff --git a/SRC/pdlatra.f b/SRC/pdlatra.f index 577eaae2..5df19afb 100644 --- a/SRC/pdlatra.f +++ b/SRC/pdlatra.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, N * .. @@ -123,15 +130,47 @@ DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT(' inputs:,IA:',I5,',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * TRACE = ZERO IF( N.EQ.0 ) THEN PDLATRA = TRACE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -180,6 +219,10 @@ DOUBLE PRECISION FUNCTION PDLATRA( N, A, IA, JA, DESCA ) * PDLATRA = TRACE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRA diff --git a/SRC/pdlatrd.f b/SRC/pdlatrd.f index 5903bc99..1964e782 100644 --- a/SRC/pdlatrd.f +++ b/SRC/pdlatrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, $ IW, JW, DESCW, WORK ) * @@ -6,6 +12,7 @@ SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IW, JA, JW, N, NB @@ -249,15 +256,49 @@ SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IW, JA, JW, N, NB, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLATRD inputs:,UPLO:',A5,',IA:',I5,',IW:',I5, + $ ',JA:',I5,',JW:',I5,',N:',I5, + $ ',NB:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), $ NPCOL ) ) CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW, @@ -408,6 +449,10 @@ SUBROUTINE PDLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, END IF END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRD diff --git a/SRC/pdlatrs.f b/SRC/pdlatrs.f index e1f2755e..fc4f4ffb 100644 --- a/SRC/pdlatrs.f +++ b/SRC/pdlatrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ JA, DESCA, X, IX, JX, DESCX, SCALE, CNORM, $ WORK ) @@ -7,6 +13,7 @@ SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, NORMIN, TRANS, UPLO INTEGER IA, IX, JA, JX, N @@ -47,17 +54,53 @@ SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, INFOG2L, $ PDTRSV * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, NORMIN, TRANS, UPLO, IA, + $ IX, JA, JX, N, SCALE, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLATRS inputs:,DIAG:',A5,',NORMIN:',A5, + $ ',TRANS:',A5,',UPLO:',A5,',IA:',I5, + $ ',IX:',I5,',JA:',I5,',JX:',I5,',N:',I5, + $ ',SCALE:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * ***** NO SCALING ***** Call PDTRSV for all cases ***** * @@ -80,6 +123,10 @@ SUBROUTINE PDLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, IA, $ LDX, MYROW, IXCOL ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRS diff --git a/SRC/pdlatrz.f b/SRC/pdlatrz.f index e4952f3c..c3180a9f 100644 --- a/SRC/pdlatrz.f +++ b/SRC/pdlatrz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, JA, L, M, N * .. @@ -185,17 +192,50 @@ SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) INTEGER NUMROC EXTERNAL NUMROC * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, JA, L, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLATRZ inputs:,IA:',I5,',JA:',I5,',L:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * MP = NUMROC( IA+M-1, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), $ NPROW ) @@ -230,6 +270,10 @@ SUBROUTINE PDLATRZ( M, N, L, A, IA, JA, DESCA, TAU, WORK ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLATRZ diff --git a/SRC/pdlauu2.f b/SRC/pdlauu2.f index 4eb40902..a82aace1 100644 --- a/SRC/pdlauu2.f +++ b/SRC/pdlauu2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N @@ -140,16 +147,49 @@ SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) DOUBLE PRECISION DDOT EXTERNAL DDOT, LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters and compute local indexes * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, JA, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDLAUU2 inputs:,UPLO:',A5,',IA:',I5,',JA:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, IIA, JJA, $ IAROW, IACOL ) * @@ -198,6 +238,10 @@ SUBROUTINE PDLAUU2( UPLO, N, A, IA, JA, DESCA ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAUU2 diff --git a/SRC/pdlauum.f b/SRC/pdlauum.f index d0d1db54..0825cc3d 100644 --- a/SRC/pdlauum.f +++ b/SRC/pdlauum.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, JA, N @@ -139,12 +146,33 @@ SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * .. Intrinsic Functions .. INTRINSIC MIN * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+N-1 ) IF( LSAME( UPLO, 'U' ) ) THEN @@ -208,6 +236,10 @@ SUBROUTINE PDLAUUM( UPLO, N, A, IA, JA, DESCA ) 20 CONTINUE END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAUUM diff --git a/SRC/pdlawil.f b/SRC/pdlawil.f index e8bc3a08..dd2920cc 100644 --- a/SRC/pdlawil.f +++ b/SRC/pdlawil.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER II, JJ, M DOUBLE PRECISION H33, H43H34, H44 @@ -131,12 +138,41 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * HBL = DESCA( MB_ ) CONTXT = DESCA( CTXT_ ) LDA = DESCA( LLD_ ) CALL BLACS_GRIDINFO( CONTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) II, JJ, M, H33, H43H34, H44, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDLAWIL inputs:,II:',I5,',JJ:',I5,',M:',I5, + $ ',H33:',F9.4,',H43H34:',F9.4,',H44:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LEFT = MOD( MYCOL+NPCOL-1, NPCOL ) RIGHT = MOD( MYCOL+1, NPCOL ) UP = MOD( MYROW+NPROW-1, NPROW ) @@ -230,8 +266,13 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) V3( 1 ) = A( ( ICOL-2 )*LDA+IROW ) END IF END IF - IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) - $ RETURN + IF( ( MYROW.NE.II ) .OR. ( MYCOL.NE.JJ ) ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( MODKM1.GT.1 ) THEN CALL INFOG2L( M+2, M+2, DESCA, NPROW, NPCOL, MYROW, MYCOL, @@ -255,6 +296,10 @@ SUBROUTINE PDLAWIL( II, JJ, M, A, DESCA, H44, H33, H43H34, V ) V( 2 ) = V2 V( 3 ) = V3( 1 ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDLAWIL diff --git a/SRC/pdorg2l.f b/SRC/pdorg2l.f index 889ff58a..aedd4a33 100644 --- a/SRC/pdorg2l.f +++ b/SRC/pdorg2l.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -181,13 +188,42 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORG2L inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -220,15 +256,28 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORG2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -271,6 +320,10 @@ SUBROUTINE PDORG2L( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORG2L diff --git a/SRC/pdorg2r.f b/SRC/pdorg2r.f index fbcb7b54..6d2d7a21 100644 --- a/SRC/pdorg2r.f +++ b/SRC/pdorg2r.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -182,13 +189,42 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORG2R inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -221,15 +257,28 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORG2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -274,6 +323,10 @@ SUBROUTINE PDORG2R( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORG2R diff --git a/SRC/pdorgl2.f b/SRC/pdorgl2.f index ec30d99b..ca797180 100644 --- a/SRC/pdorgl2.f +++ b/SRC/pdorgl2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -181,13 +188,42 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGL2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -220,15 +256,28 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGL2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -280,6 +329,10 @@ SUBROUTINE PDORGL2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGL2 diff --git a/SRC/pdorglq.f b/SRC/pdorglq.f index 8cb7ab73..02017a97 100644 --- a/SRC/pdorglq.f +++ b/SRC/pdorglq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -184,13 +191,42 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGLQ inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -233,15 +269,28 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGLQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( MB_ ) * DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA, DESCA( MB_ ) ) * DESCA( MB_ ), IA+K-1 ) @@ -326,6 +375,10 @@ SUBROUTINE PDORGLQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGLQ diff --git a/SRC/pdorgql.f b/SRC/pdorgql.f index 978972f2..f423748a 100644 --- a/SRC/pdorgql.f +++ b/SRC/pdorgql.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -183,13 +190,42 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGQL inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -231,15 +267,28 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGQL', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA+N-K, DESCA( NB_ ) )*DESCA( NB_ ), JA+N-1 ) @@ -293,6 +342,10 @@ SUBROUTINE PDORGQL( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGQL diff --git a/SRC/pdorgqr.f b/SRC/pdorgqr.f index 5040bbd8..1f03c13c 100644 --- a/SRC/pdorgqr.f +++ b/SRC/pdorgqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -185,13 +192,42 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGQR inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -234,15 +270,28 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( NB_ )*DESCA( NB_ ) + 1 JN = MIN( ICEIL( JA, DESCA( NB_ ) ) * DESCA( NB_ ), JA+K-1 ) @@ -329,6 +378,10 @@ SUBROUTINE PDORGQR( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGQR diff --git a/SRC/pdorgr2.f b/SRC/pdorgr2.f index d8dca581..499c0201 100644 --- a/SRC/pdorgr2.f +++ b/SRC/pdorgr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -181,13 +188,42 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGR2 inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -220,15 +256,28 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -274,6 +323,10 @@ SUBROUTINE PDORGR2( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGR2 diff --git a/SRC/pdorgrq.f b/SRC/pdorgrq.f index 1e493c54..5e6fb586 100644 --- a/SRC/pdorgrq.f +++ b/SRC/pdorgrq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, K, LWORK, M, N * .. @@ -184,13 +191,42 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, K, LWORK, M, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORGRQ inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',K:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -233,15 +269,28 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORGRQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.LE.0 ) - $ RETURN + IF( M.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IPW = DESCA( MB_ )*DESCA( MB_ ) + 1 IN = MIN( ICEIL( IA+M-K, DESCA( MB_ ) )*DESCA( MB_ ), IA+M-1 ) @@ -295,6 +344,10 @@ SUBROUTINE PDORGRQ( M, N, K, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORGRQ diff --git a/SRC/pdorm2l.f b/SRC/pdorm2l.f index ede69c4d..fd1d189c 100644 --- a/SRC/pdorm2l.f +++ b/SRC/pdorm2l.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -237,13 +244,44 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORM2L inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -314,15 +352,28 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORM2L', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, @@ -425,6 +476,10 @@ SUBROUTINE PDORM2L( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORM2L diff --git a/SRC/pdorm2r.f b/SRC/pdorm2r.f index 93a92968..889d36bb 100644 --- a/SRC/pdorm2r.f +++ b/SRC/pdorm2r.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -237,13 +244,44 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORM2R inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -315,15 +353,28 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORM2R', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( DESCA( M_ ).EQ.1 ) THEN CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, @@ -429,6 +480,10 @@ SUBROUTINE PDORM2R( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORM2R diff --git a/SRC/pdormbr.f b/SRC/pdormbr.f index e58eb853..94e0b828 100644 --- a/SRC/pdormbr.f +++ b/SRC/pdormbr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS, VECT INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -309,13 +316,44 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, VECT, IA, IC, INFO, + $ JA, JC, K, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORMBR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',VECT:',A5,',IA:',I5,',IC:',I5,',INFO:',I5, + $ ',JA:',I5,',JC:',I5,',K:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -530,15 +568,28 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMBR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( APPLYQ ) THEN * @@ -584,6 +635,10 @@ SUBROUTINE PDORMBR( VECT, SIDE, TRANS, M, N, K, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMBR diff --git a/SRC/pdormhr.f b/SRC/pdormhr.f index ca94dc44..efa53e69 100644 --- a/SRC/pdormhr.f +++ b/SRC/pdormhr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, IHI, ILO, INFO, JA, JC, LWORK, M, N @@ -246,13 +253,44 @@ SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, IHI, ILO, + $ INFO, JA, JC, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDORMHR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',IHI:',I5,',ILO:',I5, + $ ',INFO:',I5,',JA:',I5,',JC:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -375,21 +413,38 @@ SUBROUTINE PDORMHR( SIDE, TRANS, M, N, ILO, IHI, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMHR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PDORMQR( SIDE, TRANS, MI, NI, NH, A, IAA, JAA, DESCA, TAU, $ C, ICC, JCC, DESCC, WORK, LWORK, IINFO ) * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMHR diff --git a/SRC/pdorml2.f b/SRC/pdorml2.f index 424b18b6..518c6a14 100644 --- a/SRC/pdorml2.f +++ b/SRC/pdorml2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -235,13 +242,44 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORML2 inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -317,15 +355,28 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORML2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -383,6 +434,10 @@ SUBROUTINE PDORML2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORML2 diff --git a/SRC/pdormlq.f b/SRC/pdormlq.f index 69f4fee1..bc445bad 100644 --- a/SRC/pdormlq.f +++ b/SRC/pdormlq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMLQ inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -355,15 +393,28 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMLQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -444,6 +495,10 @@ SUBROUTINE PDORMLQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMLQ diff --git a/SRC/pdormql.f b/SRC/pdormql.f index 24030893..24e90cba 100644 --- a/SRC/pdormql.f +++ b/SRC/pdormql.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMQL inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -356,15 +394,28 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMQL', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -451,6 +502,10 @@ SUBROUTINE PDORMQL( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMQL diff --git a/SRC/pdormqr.f b/SRC/pdormqr.f index 65e8cd05..d69246e9 100644 --- a/SRC/pdormqr.f +++ b/SRC/pdormqr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMQR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -356,15 +394,28 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMQR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -443,6 +494,10 @@ SUBROUTINE PDORMQR( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMQR diff --git a/SRC/pdormr2.f b/SRC/pdormr2.f index b7fd1dcf..17d0aac3 100644 --- a/SRC/pdormr2.f +++ b/SRC/pdormr2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -234,13 +241,44 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMR2 inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -312,15 +350,28 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMR2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -374,6 +425,10 @@ SUBROUTINE PDORMR2( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMR2 diff --git a/SRC/pdormr3.f b/SRC/pdormr3.f index ac77077f..bac4bb63 100644 --- a/SRC/pdormr3.f +++ b/SRC/pdormr3.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N @@ -237,13 +244,44 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, L, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMR3 inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',L:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -317,15 +355,28 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMR3', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -383,6 +434,10 @@ SUBROUTINE PDORMR3( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMR3 diff --git a/SRC/pdormrq.f b/SRC/pdormrq.f index 1ea36953..2b425bf7 100644 --- a/SRC/pdormrq.f +++ b/SRC/pdormrq.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, $ C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, LWORK, M, N @@ -245,13 +252,44 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMRQ inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -367,15 +405,28 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMRQ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -468,6 +519,10 @@ SUBROUTINE PDORMRQ( SIDE, TRANS, M, N, K, A, IA, JA, DESCA, TAU, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMRQ diff --git a/SRC/pdormrz.f b/SRC/pdormrz.f index 7bde2ab3..e21a91bc 100644 --- a/SRC/pdormrz.f +++ b/SRC/pdormrz.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS INTEGER IA, IC, INFO, JA, JC, K, L, LWORK, M, N @@ -250,13 +257,44 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, IA, IC, INFO, JA, + $ JC, K, L, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMRZ inputs:,SIDE:',A5,',TRANS:',A5, + $ ',IA:',I5,',IC:',I5,',INFO:',I5,',JA:',I5, + $ ',JC:',I5,',K:',I5,',L:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -364,15 +402,28 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMRZ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) @@ -471,6 +522,10 @@ SUBROUTINE PDORMRZ( SIDE, TRANS, M, N, K, L, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMRZ diff --git a/SRC/pdormtr.f b/SRC/pdormtr.f index 6898b6c7..04182aec 100644 --- a/SRC/pdormtr.f +++ b/SRC/pdormtr.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, $ TAU, C, IC, JC, DESCC, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER SIDE, TRANS, UPLO INTEGER IA, IC, INFO, JA, JC, LWORK, M, N @@ -260,13 +267,44 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) SIDE, TRANS, UPLO, IA, IC, INFO, + $ JA, JC, LWORK, M, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDORMTR inputs:,SIDE:',A5,',TRANS:',A5, + $ ',UPLO:',A5,',IA:',I5,',IC:',I5,',INFO:',I5, + $ ',JA:',I5,',JC:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -402,15 +440,28 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDORMTR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( UPPER ) THEN * @@ -430,6 +481,10 @@ SUBROUTINE PDORMTR( SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDORMTR diff --git a/SRC/pdpbsv.f b/SRC/pdpbsv.f index 68eacc84..577c6d3d 100644 --- a/SRC/pdpbsv.f +++ b/SRC/pdpbsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ WORK, LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LWORK, N, NRHS @@ -382,8 +389,24 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * .. External Subroutines .. EXTERNAL PDPBTRF, PDPBTRS, PXERBLA * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDPBTRF and PDPBTRS. @@ -405,11 +428,29 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDPBSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, BW, IB, INFO, JA, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPBSV inputs:,UPLO:',A5,',BW:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -427,6 +468,10 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDPBSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -440,9 +485,17 @@ SUBROUTINE PDPBSV( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBSV diff --git a/SRC/pdpbtrf.f b/SRC/pdpbtrf.f index 6367cac6..131f4874 100644 --- a/SRC/pdpbtrf.f +++ b/SRC/pdpbtrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, $ LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, INFO, JA, LAF, LWORK, N @@ -388,8 +395,24 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -421,6 +444,19 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, BW, INFO, JA, LAF, LWORK, + $ N, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPBTRF inputs:,UPLO:',A5,',BW:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -471,12 +507,20 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDPBTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 6*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -491,6 +535,10 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDPBTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -505,6 +553,10 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, INFO = -10 CALL PXERBLA( ICTXT, 'PDPBTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -560,13 +612,22 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1467,6 +1528,10 @@ SUBROUTINE PDPBTRF( UPLO, N, BW, A, JA, DESCA, AF, LAF, WORK, END IF * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBTRF diff --git a/SRC/pdpbtrs.f b/SRC/pdpbtrs.f index a09a0b93..784dfaed 100644 --- a/SRC/pdpbtrs.f +++ b/SRC/pdpbtrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -391,8 +398,24 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -450,6 +473,21 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, BW, IB, INFO, JA, LAF, + $ LWORK, N, NRHS, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDPBTRS inputs:,UPLO:',A5,',BW:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',LAF:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -518,12 +556,20 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, INFO = -( 2 ) CALL PXERBLA( ICTXT, 'PDPBTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*BW ) ) THEN INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -537,6 +583,10 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, INFO = -14 CALL PXERBLA( ICTXT, 'PDPBTRS: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -606,16 +656,30 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -728,6 +792,10 @@ SUBROUTINE PDPBTRS( UPLO, N, BW, NRHS, A, JA, DESCA, B, IB, DESCB, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBTRS diff --git a/SRC/pdpbtrsv.f b/SRC/pdpbtrsv.f index a4cdffc4..5ce87c46 100644 --- a/SRC/pdpbtrsv.f +++ b/SRC/pdpbtrsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, $ IB, DESCB, AF, LAF, WORK, LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER TRANS, UPLO INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS @@ -405,8 +412,24 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -468,6 +491,21 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, MBW2 = BW*BW * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) TRANS, UPLO, BW, IB, INFO, JA, + $ LAF, LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDPBTRSV inputs:,TRANS:',A5,',UPLO:',A5, + $ ',BW:',I5,',IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LAF:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -547,6 +585,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, CALL PXERBLA( ICTXT, $ 'PDPBTRSV, D&C alg.: only 1 block per proc', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -554,6 +596,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, INFO = -( 8*100+4 ) CALL PXERBLA( ICTXT, 'PDPBTRSV, D&C alg.: NB too small', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -567,6 +613,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, INFO = -14 CALL PXERBLA( ICTXT, 'PDPBTRSV: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -638,16 +688,30 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPBTRSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1504,6 +1568,10 @@ SUBROUTINE PDPBTRSV( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPBTRSV diff --git a/SRC/pdpocon.f b/SRC/pdpocon.f index 9930469d..f338027a 100644 --- a/SRC/pdpocon.f +++ b/SRC/pdpocon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, $ LWORK, IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LIWORK, LWORK, N @@ -203,13 +210,44 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LIWORK, + $ LWORK, N, ANORM, RCOND, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDPOCON inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',ANORM:',F9.4,',RCOND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -273,8 +311,16 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOCON', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -283,11 +329,23 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, RCOND = ZERO IF( N.EQ.0 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( ANORM.EQ.ZERO ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( N.EQ.1 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -398,6 +456,10 @@ SUBROUTINE PDPOCON( UPLO, N, A, IA, JA, DESCA, ANORM, RCOND, WORK, CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOCON diff --git a/SRC/pdpoequ.f b/SRC/pdpoequ.f index 6254daea..0f394dbc 100644 --- a/SRC/pdpoequ.f +++ b/SRC/pdpoequ.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, N DOUBLE PRECISION AMAX, SCOND @@ -178,13 +185,42 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, N, AMAX, SCOND, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOEQU inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',N:',I5,',AMAX:',F9.4,',SCOND:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -198,6 +234,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOEQU', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -206,6 +246,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, IF( N.EQ.0 ) THEN SCOND = ONE AMAX = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -329,6 +373,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * CALL IGAMN2D( ICTXT, 'All', ALLCTOP, 1, 1, INFO, 1, II, JJ, -1, $ -1, MYCOL ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * ELSE @@ -350,6 +398,10 @@ SUBROUTINE PDPOEQU( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOEQU diff --git a/SRC/pdporfs.f b/SRC/pdporfs.f index f827625d..cb4600de 100644 --- a/SRC/pdporfs.f +++ b/SRC/pdporfs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, $ DESCAF, B, IB, JB, DESCB, X, IX, JX, DESCX, $ FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, @@ -297,13 +304,48 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* .. Initialize EST + EST = 0.0 +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IAF, IB, INFO, IX, + $ JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDPORFS inputs:,UPLO:',A5,',IA:',I5,',IAF:',I5, + $ ',IB:',I5,',INFO:',I5,',IX:',I5, + $ ',JA:',I5,',JAF:',I5,',JB:',I5, + $ ',JX:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -420,8 +462,16 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPORFS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -436,6 +486,10 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -854,6 +908,10 @@ SUBROUTINE PDPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPORFS diff --git a/SRC/pdposv.f b/SRC/pdposv.f index 304e0617..25e140f9 100644 --- a/SRC/pdposv.f +++ b/SRC/pdposv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -190,13 +197,43 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, INFO, JA, JB, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPOSV inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -239,6 +276,10 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -256,6 +297,10 @@ SUBROUTINE PDPOSV( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOSV diff --git a/SRC/pdposvx.f b/SRC/pdposvx.f index 0e75f09a..4908f8ad 100644 --- a/SRC/pdposvx.f +++ b/SRC/pdposvx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, $ IAF, JAF, DESCAF, EQUED, SR, SC, B, IB, JB, $ DESCB, X, IX, JX, DESCX, RCOND, FERR, BERR, @@ -8,6 +14,7 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * and University of California, Berkeley. * December 31, 1998 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER EQUED, FACT, UPLO INTEGER IA, IAF, IB, INFO, IX, JA, JAF, JB, JX, LIWORK, @@ -387,13 +394,48 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * .. Intrinsic Functions .. INTRINSIC ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*512 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) EQUED, FACT, UPLO, IA, IAF, + $ IB, INFO, IX, JA, JAF, JB, JX, LIWORK, + $ LWORK, N, NRHS, + $ RCOND, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPOSVX inputs:,EQUED:',A5,',FACT:',A5, + $ ',UPLO:',A5,',IA:',I5,',IAF:',I5,',IB:',I5, + $ ',INFO:',I5,',IX:',I5,',JA:',I5, + $ ',JAF:',I5,',JB:',I5,',JX:',I5,',LIWORK:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',RCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -546,8 +588,16 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOSVX', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -601,6 +651,10 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, IF( INFO.NE.0 ) THEN IF( INFO.GT.0 ) $ RCOND = ZERO +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF END IF @@ -618,6 +672,10 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * IF( RCOND.LT.PDLAMCH( ICTXT, 'Epsilon' ) ) THEN INFO = IA + N +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -661,6 +719,10 @@ SUBROUTINE PDPOSVX( FACT, UPLO, N, NRHS, A, IA, JA, DESCA, AF, * WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOSVX diff --git a/SRC/pdpotf2.f b/SRC/pdpotf2.f index fbd98551..324025c0 100644 --- a/SRC/pdpotf2.f +++ b/SRC/pdpotf2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N @@ -164,13 +171,41 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) DOUBLE PRECISION DDOT EXTERNAL LSAME, DDOT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOTF2 inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -199,13 +234,22 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTF2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Compute local information * @@ -344,6 +388,10 @@ SUBROUTINE PDPOTF2( UPLO, N, A, IA, JA, DESCA, INFO ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTF2 diff --git a/SRC/pdpotrf.f b/SRC/pdpotrf.f index 8f9cd118..01422d8a 100644 --- a/SRC/pdpotrf.f +++ b/SRC/pdpotrf.f @@ -1,6 +1,9 @@ +* +* Copyright (c) 2022-23 Advanced Micro Devices, Inc.  All rights reserved. +* * -- ScaLAPACK routine -- -* Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. -* June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * @@ -9,6 +12,7 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N @@ -153,11 +157,18 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -174,13 +185,41 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOTRF inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -214,19 +253,30 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PDPOTRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) @@ -272,9 +322,19 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -341,9 +401,19 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -378,6 +448,10 @@ SUBROUTINE PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) CALL PB_TOPSET( ICTXT, 'Broadcast', 'Columnwise', COLBTOP ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTRF diff --git a/SRC/pdpotri.f b/SRC/pdpotri.f index fd287cdd..54b2ebd0 100644 --- a/SRC/pdpotri.f +++ b/SRC/pdpotri.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, N @@ -143,13 +150,41 @@ SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPOTRI inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -182,25 +217,43 @@ SUBROUTINE PDPOTRI( UPLO, N, A, IA, JA, DESCA, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRI', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Invert the triangular Cholesky factor U or L. * CALL PDTRTRI( UPLO, 'Non-unit', N, A, IA, JA, DESCA, INFO ) * - IF( INFO.GT.0 ) - $ RETURN + IF( INFO.GT.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Form inv(U)*inv(U)' or inv(L)'*inv(L). * CALL PDLAUUM( UPLO, N, A, IA, JA, DESCA ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTRI diff --git a/SRC/pdpotrs.f b/SRC/pdpotrs.f index 36c82e76..ce75ea8b 100644 --- a/SRC/pdpotrs.f +++ b/SRC/pdpotrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -171,13 +178,43 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, INFO, JA, JB, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPOTRS inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',INFO:',I5,',JA:',I5,',JB:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -221,13 +258,22 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPOTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. NRHS.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( UPPER ) THEN * @@ -257,6 +303,10 @@ SUBROUTINE PDPOTRS( UPLO, N, NRHS, A, IA, JA, DESCA, B, IB, JB, $ ONE, A, IA, JA, DESCA, B, IB, JB, DESCB ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPOTRS diff --git a/SRC/pdptsv.f b/SRC/pdptsv.f index 25682bdd..8dc7be43 100644 --- a/SRC/pdptsv.f +++ b/SRC/pdptsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, $ LWORK, INFO ) * @@ -8,6 +14,7 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IB, INFO, JA, LWORK, N, NRHS * .. @@ -383,8 +390,24 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, * .. External Subroutines .. EXTERNAL PDPTTRF, PDPTTRS, PXERBLA * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Note: to avoid duplication, most error checking is not performed * in this routine and is left to routines * PDPTTRF and PDPTTRS. @@ -409,11 +432,28 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, CALL PXERBLA( ICTXT, $ 'PDPTSV', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ENDIF * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IB, INFO, JA, LWORK, N, NRHS, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPTSV inputs:,IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',N:',I5,',NRHS:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * * Size needed for AF in factorization * @@ -430,6 +470,10 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, IF( INFO .LT. 0 ) THEN CALL PXERBLA( ICTXT, 'PDPTSV', -INFO ) ENDIF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -443,9 +487,17 @@ SUBROUTINE PDPTSV( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTSV diff --git a/SRC/pdpttrf.f b/SRC/pdpttrf.f index 3266faeb..09e99034 100644 --- a/SRC/pdpttrf.f +++ b/SRC/pdpttrf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, JA, LAF, LWORK, N * .. @@ -384,8 +391,24 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -421,6 +444,18 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, JA, LAF, LWORK, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPTTRF inputs:,INFO:',I5,',JA:',I5,',LAF:',I5, + $ ',LWORK:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -451,12 +486,20 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDPTTRF, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRF, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -471,6 +514,10 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, AF( 1 ) = LAF_MIN CALL PXERBLA( ICTXT, 'PDPTTRF: auxiliary storage error ', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -485,6 +532,10 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, INFO = -9 CALL PXERBLA( ICTXT, 'PDPTTRF: worksize error ', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -536,13 +587,22 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1010,6 +1070,10 @@ SUBROUTINE PDPTTRF( N, D, E, JA, DESCA, AF, LAF, WORK, LWORK, END IF * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTTRF diff --git a/SRC/pdpttrs.f b/SRC/pdpttrs.f index 2e542331..9821b9c1 100644 --- a/SRC/pdpttrs.f +++ b/SRC/pdpttrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, $ LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS * .. @@ -398,8 +405,24 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * .. Intrinsic Functions .. INTRINSIC DBLE, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -465,6 +488,19 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IB, INFO, JA, LAF, LWORK, N, + $ NRHS, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDPTTRS inputs:,IB:',I5,',INFO:',I5,',JA:',I5, + $ ',LAF:',I5,',LWORK:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -513,12 +549,20 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, INFO = -( 1 ) CALL PXERBLA( ICTXT, 'PDPTTRS, D&C alg.: only 1 block per proc' $ , -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * IF( ( JA+N-1.GT.NB ) .AND. ( NB.LT.2*INT_ONE ) ) THEN INFO = -( 5*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRS, D&C alg.: NB too small', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -532,6 +576,10 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, INFO = -12 CALL PXERBLA( ICTXT, 'PDPTTRS: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -597,16 +645,30 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -741,6 +803,10 @@ SUBROUTINE PDPTTRS( N, NRHS, D, E, JA, DESCA, B, IB, DESCB, AF, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTTRS diff --git a/SRC/pdpttrsv.f b/SRC/pdpttrsv.f index 876e667c..d10d7610 100644 --- a/SRC/pdpttrsv.f +++ b/SRC/pdpttrsv.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, $ AF, LAF, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * and University of California, Berkeley. * April 3, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS @@ -409,8 +416,24 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * .. Intrinsic Functions .. INTRINSIC ICHAR, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Test the input parameters * INFO = 0 @@ -476,6 +499,20 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IB, INFO, JA, LAF, LWORK, + $ N, NRHS, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDPTTRSV inputs:,UPLO:',A5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',LAF:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NP = NPROW*NPCOL * * @@ -533,6 +570,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, CALL PXERBLA( ICTXT, $ 'PDPTTRSV, D&C alg.: only 1 block per proc', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -540,6 +581,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, INFO = -( 7*100+4 ) CALL PXERBLA( ICTXT, 'PDPTTRSV, D&C alg.: NB too small', $ -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -553,6 +598,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, INFO = -14 CALL PXERBLA( ICTXT, 'PDPTTRSV: worksize error', -INFO ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -620,16 +669,30 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, * IF( INFO.LT.0 ) THEN CALL PXERBLA( ICTXT, 'PDPTTRSV', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file * - IF( NRHS.EQ.0 ) - $ RETURN + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF +* + IF( NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * Adjust addressing into matrix space to properly get into @@ -1081,6 +1144,10 @@ SUBROUTINE PDPTTRSV( UPLO, N, NRHS, D, E, JA, DESCA, B, IB, DESCB, WORK( 1 ) = WORK_SIZE_MIN * * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDPTTRSV diff --git a/SRC/pdrot.f b/SRC/pdrot.f index cbbfb3e8..35d715dd 100644 --- a/SRC/pdrot.f +++ b/SRC/pdrot.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, $ INCY, CS, SN, WORK, LWORK, INFO ) * @@ -9,6 +15,7 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -200,12 +207,43 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * .. Local Functions .. INTEGER ICEIL * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) N, IX, JX, INCX, IY, JY, INCY, + $ LWORK, INFO, CS, SN, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDROT inputs:,N:',I5,',IX:',I5,',JX:',I5, + $ ',INCX:',I5,',IY:',I5,',JY:',I5,',INCY:',I5, + $ ',LWORK:',I5,',INFO:',I5, + $ ',CS:',F9.4,',SN:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Test and decode parameters @@ -268,7 +306,7 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, IF( LEFT ) THEN RSRC1 = INDXG2P( IX, MB, MYROW, DESCX(RSRC_), NPROW ) RSRC2 = INDXG2P( IY, MB, MYROW, DESCY(RSRC_), NPROW ) - CSRC = INDXG2P( JX, NB, MYCOL, DESCX(CSRC_), NPCOL ) + CSRC = INDXG2P( JX, NB, MYCOL, DESCX(CSRC_), NPCOL ) ICOFFXY = MOD( JX - 1, NB ) XYCOLS = NUMROC( N+ICOFFXY, NB, MYCOL, CSRC, NPCOL ) IF( ( MYROW.EQ.RSRC1 .OR. MYROW.EQ.RSRC2 ) .AND. @@ -281,7 +319,7 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, ELSEIF( RIGHT ) THEN CSRC1 = INDXG2P( JX, NB, MYCOL, DESCX(CSRC_), NPCOL ) CSRC2 = INDXG2P( JY, NB, MYCOL, DESCY(CSRC_), NPCOL ) - RSRC = INDXG2P( IX, MB, MYROW, DESCX(RSRC_), NPROW ) + RSRC = INDXG2P( IX, MB, MYROW, DESCX(RSRC_), NPROW ) IROFFXY = MOD( IX - 1, MB ) XYROWS = NUMROC( N+IROFFXY, MB, MYROW, RSRC, NPROW ) IF( ( MYCOL.EQ.CSRC1 .OR. MYCOL.EQ.CSRC2 ) .AND. @@ -300,15 +338,27 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDROT', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LQUERY ) THEN WORK( 1 ) = DBLE(MNWRK) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -328,6 +378,10 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, CALL DROT( N, X((JX-1)*LLDX+IX), 1, Y((JY-1)*LLDY+IY), $ 1, CS, SN ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -435,6 +489,10 @@ SUBROUTINE PDROT( N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, * Store minimum workspace requirements in WORK-array and return * WORK( 1 ) = DBLE(MNWRK) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDROT diff --git a/SRC/pdrscl.f b/SRC/pdrscl.f index 4096d1e8..7ec8d109 100644 --- a/SRC/pdrscl.f +++ b/SRC/pdrscl.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION SA @@ -141,17 +148,50 @@ SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) * .. Intrinsic Functions .. INTRINSIC ABS * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IX, INCX, JX, N, SA, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDRSCL inputs:,IX:',I5,',INCX:',I5,',JX:',I5, + $ ',N:',I5,',SA:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get machine parameters * @@ -198,6 +238,10 @@ SUBROUTINE PDRSCL( N, SA, SX, IX, JX, DESCX, INCX ) IF( .NOT.DONE ) $ GO TO 10 * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDRSCL diff --git a/SRC/pdstedc.f b/SRC/pdstedc.f index 1d0f49ec..3ad94f38 100644 --- a/SRC/pdstedc.f +++ b/SRC/pdstedc.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, $ IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, * and University of California, Berkeley. * March 13, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER COMPZ INTEGER INFO, IQ, JQ, LIWORK, LWORK, N @@ -145,15 +152,51 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Test the input parameters. * CALL BLACS_GRIDINFO( DESCQ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) COMPZ, INFO, IQ, JQ, LIWORK, + $ LWORK, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDSTEDC inputs:,COMPZ:',A5,',INFO:',I5, + $ ',IQ:',I5,',JQ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF LDQ = DESCQ( LLD_ ) NB = DESCQ( NB_ ) NP = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), NPROW ) @@ -192,8 +235,16 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCQ( CTXT_ ), 'PDSTEDC', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -261,6 +312,10 @@ SUBROUTINE PDSTEDC( COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, $ WORK( 1 ) = DBLE( LWMIN ) IF( LIWORK.GT.0 ) $ IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSTEDC diff --git a/SRC/pdstein.f b/SRC/pdstein.f index 91d6e743..21024133 100644 --- a/SRC/pdstein.f +++ b/SRC/pdstein.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, $ ICLUSTR, GAP, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, * and University of California, Berkeley. * November 15, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER INFO, IZ, JZ, LIWORK, LWORK, M, N DOUBLE PRECISION ORFAC @@ -295,12 +302,48 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL BLACS_GRIDINFO( DESCZ( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) INFO, IZ, JZ, LIWORK, LWORK, + $ M, N, ORFAC, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDSTEIN inputs:,INFO:',I5,',IZ:',I5,',JZ:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',ORFAC:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF SELF = MYROW*NPCOL + MYCOL * * Make sure that we belong to this context (before calling PCHK1MAT) @@ -376,8 +419,16 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCZ( CTXT_ ), 'PDSTEIN', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -396,8 +447,13 @@ SUBROUTINE PDSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, ORFAC, Z, IZ, * * Quick return if possible * - IF( N.EQ.0 .OR. M.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. M.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( ORFAC.GE.ZERO ) THEN TMPFAC = ORFAC diff --git a/SRC/pdsyev.f b/SRC/pdsyev.f index 8ccf3ac0..f0ccd441 100644 --- a/SRC/pdsyev.f +++ b/SRC/pdsyev.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, $ Z, IZ, JZ, DESCZ, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LWORK, N @@ -248,15 +255,15 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * .. * .. Local Scalars .. LOGICAL LOWER, WANTZ - INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, - $ IINFO, INDD, INDD2, INDE, INDE2, INDTAU, - $ INDWORK, INDWORK2, IROFFA, IROFFZ, ISCALE, - $ IZROW, J, K, LDC, LLWORK, LWMIN, MB_A, MB_Z, + INTEGER CONTEXTC, CSRC_A, I, IACOL, IAROW, ICOFFA, + $ IINFO, INDD, INDD2, INDE, INDE2, INDTAU, + $ INDWORK, INDWORK2, IROFFA, IROFFZ, ISCALE, + $ IZROW, J, K, LDC, LLWORK, LWMIN, MB_A, MB_Z, $ MYCOL, MYPCOLC, MYPROWC, MYROW, NB, NB_A, NB_Z, - $ NP, NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ, - $ NRC, QRMEM, RSRC_A, RSRC_Z, SIZEMQRLEFT, + $ NP, NPCOL, NPCOLC, NPROCS, NPROW, NPROWC, NQ, + $ NRC, QRMEM, RSRC_A, RSRC_Z, SIZEMQRLEFT, $ SIZESYTRD - DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, $ SMLNUM * .. * .. Local Arrays .. @@ -278,19 +285,60 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD, SQRT, INT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Quick return * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( N.EQ.0 ) RETURN * * Test the input arguments. * CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) INFO = 0 +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, UPLO, IA, INFO, IZ, JA, + $ JZ, LWORK, N, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDSYEV inputs:,JOBZ:',A5,',UPLO:',A5,',IA:',I5, + $ ',INFO:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * WANTZ = LSAME( JOBZ, 'V' ) IF( NPROW.EQ.-1 ) THEN @@ -452,9 +500,17 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, IF( INFO.NE.0 ) THEN CALL PXERBLA( DESCA( CTXT_ ), 'PDSYEV', -INFO ) IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LWORK .EQ. -1 ) THEN IF( WANTZ ) CALL BLACS_GRIDEXIT( CONTEXTC ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -491,7 +547,7 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, 10 CONTINUE IF( LSAME( UPLO, 'U') ) THEN DO 20 I=1,N-1 - CALL PDELGET( 'A', ' ', WORK(INDE2+I-1), A, + CALL PDELGET( 'A', ' ', WORK(INDE2+I-1), A, $ I+IA-1, I+JA, DESCA ) 20 CONTINUE ELSE @@ -511,7 +567,7 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * to matrix Q. * CALL DSTEQR2( 'I', N, WORK( INDD2 ), WORK( INDE2 ), - $ WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), + $ WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), $ INFO ) * CALL PDGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, IA, JA, @@ -567,11 +623,15 @@ SUBROUTINE PDSYEV( JOBZ, UPLO, N, A, IA, JA, DESCA, W, * DO 50 I = 1, J IF( INFO.EQ.0 .AND. ( WORK( I+INDTAU )-WORK( I+INDE ) - $ .NE. ZERO ) )THEN + $ .NE. ZERO ) )THEN INFO = N+1 END IF 50 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEV diff --git a/SRC/pdsyevd.f b/SRC/pdsyevd.f index 5d4463eb..120b7d36 100644 --- a/SRC/pdsyevd.f +++ b/SRC/pdsyevd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, $ DESCZ, WORK, LWORK, IWORK, LIWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, * and University of California, Berkeley. * March 14, 2000 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, UPLO INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LWORK, N @@ -188,20 +195,62 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Quick return * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Test the input arguments. * ICTXT = DESCZ( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, UPLO, IA, INFO, IZ, JA, + $ JZ, LIWORK, LWORK, N, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDSYEVD inputs:,JOBZ:',A5,',UPLO:',A5, + $ ',IA:',I5,',INFO:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * INFO = 0 IF( NPROW.EQ.-1 ) THEN @@ -272,8 +321,16 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -346,6 +403,10 @@ SUBROUTINE PDSYEVD( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, CALL DSCAL( N, ONE / SIGMA, W, 1 ) END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEVD diff --git a/SRC/pdsyevr.f b/SRC/pdsyevr.f index 984b9904..9ad360d8 100644 --- a/SRC/pdsyevr.f +++ b/SRC/pdsyevr.f @@ -1,8 +1,15 @@ - SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* + SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, VL, VU, IL, IU, M, NZ, W, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, $ INFO ) + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * -- ScaLAPACK routine (version 2.0.2) -- @@ -25,14 +32,14 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * PDSYEVR computes selected eigenvalues and, optionally, eigenvectors * of a real symmetric matrix A distributed in 2D blockcyclic format -* by calling the recommended sequence of ScaLAPACK routines. +* by calling the recommended sequence of ScaLAPACK routines. * * First, the matrix A is reduced to real symmetric tridiagonal form. * Then, the eigenproblem is solved using the parallel MRRR algorithm. * Last, if eigenvectors have been computed, a backtransformation is done. * * Upon successful completion, each processor stores a copy of all computed -* eigenvalues in W. The eigenvector matrix Z is stored in +* eigenvalues in W. The eigenvector matrix Z is stored in * 2D blockcyclic format distributed over all processors. * * Note that subsets of eigenvalues/vectors can be selected by @@ -67,7 +74,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * A (local input/workspace) 2D block cyclic DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_A, LOCc(JA+N-1) ), -* (see Notes below for more detailed explanation of 2d arrays) +* (see Notes below for more detailed explanation of 2d arrays) * * On entry, the symmetric matrix A. If UPLO = 'U', only the * upper triangular part of A is used to define the elements of @@ -81,7 +88,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * IA (global input) INTEGER * A's global row index, which points to the beginning of the -* submatrix which is to be operated on. +* submatrix which is to be operated on. * It should be set to 1 when operating on a full matrix. * * JA (global input) INTEGER @@ -91,17 +98,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * DESCA (global and local input) INTEGER array of dimension DLEN=9. * The array descriptor for the distributed matrix A. -* The descriptor stores details about the 2D block-cyclic +* The descriptor stores details about the 2D block-cyclic * storage, see the notes below. * If DESCA is incorrect, PDSYEVR cannot guarantee * correct error reporting. * Also note the array alignment requirements specified below. * -* VL (global input) DOUBLE PRECISION +* VL (global input) DOUBLE PRECISION * If RANGE='V', the lower bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * -* VU (global input) DOUBLE PRECISION +* VU (global input) DOUBLE PRECISION * If RANGE='V', the upper bound of the interval to be searched * for eigenvalues. Not referenced if RANGE = 'A' or 'I'. * @@ -122,7 +129,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Total number of eigenvectors computed. 0 <= NZ <= M. * The number of columns of Z that are filled. * If JOBZ .NE. 'V', NZ is not referenced. -* If JOBZ .EQ. 'V', NZ = M +* If JOBZ .EQ. 'V', NZ = M * * W (global output) DOUBLE PRECISION array, dimension (N) * Upon successful exit, the first M entries contain the selected @@ -131,7 +138,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Z (local output) DOUBLE PRECISION array, * global dimension (N, N), * local dimension ( LLD_Z, LOCc(JZ+N-1) ) -* (see Notes below for more detailed explanation of 2d arrays) +* (see Notes below for more detailed explanation of 2d arrays) * If JOBZ = 'V', then on normal exit the first M columns of Z * contain the orthonormal eigenvectors of the matrix * corresponding to the selected eigenvalues. @@ -189,8 +196,8 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by * PXERBLA. -* Note that in a workspace query, for performance the optimal -* workspace LWOPT is returned rather than the minimum necessary +* Note that in a workspace query, for performance the optimal +* workspace LWOPT is returned rather than the minimum necessary * WORKSPACE LWMIN. For very small matrices, LWOPT >> LWMIN. * * IWORK (local workspace) INTEGER array @@ -203,7 +210,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Let NNP = MAX( N, NPROW*NPCOL + 1, 4 ). Then: * LIWORK >= 12*NNP + 2*N when the eigenvectors are desired * LIWORK >= 10*NNP + 2*N when only the eigenvalues have to be computed -* +* * If LIWORK = -1, then LIWORK is global input and a workspace * query is assumed; the routine only calculates the minimum * and optimal size for all work arrays. Each of these @@ -226,8 +233,8 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * and memory location. * * Let A be a generic term for any 2D block cyclicly distributed array. -* Such a global array has an associated description vector DESCA, -* or DESCZ for the descriptor of Z, etc. +* Such a global array has an associated description vector DESCA, +* or DESCZ for the descriptor of Z, etc. * The length of a ScaLAPACK descriptor is nine. * In the following comments, the character _ should be read as * "of the global array". @@ -273,7 +280,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A * -* PDSYEVR assumes IEEE 754 standard compliant arithmetic. +* PDSYEVR assumes IEEE 754 standard compliant arithmetic. * * Alignment requirements * ====================== @@ -281,9 +288,9 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * The distributed submatrices A(IA:*, JA:*) and Z(IZ:IZ+M-1,JZ:JZ+N-1) * must satisfy the following alignment properties: * -* 1.Identical (quadratic) dimension: +* 1.Identical (quadratic) dimension: * DESCA(M_) = DESCZ(M_) = DESCA(N_) = DESCZ(N_) -* 2.Quadratic conformal blocking: +* 2.Quadratic conformal blocking: * DESCA(MB_) = DESCA(NB_) = DESCZ(MB_) = DESCZ(NB_) * DESCA(RSRC_) = DESCZ(RSRC_) * 3.MOD( IA-1, MB_A ) = MOD( IZ-1, MB_Z ) = 0 @@ -338,8 +345,24 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*512 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* INFO = 0 @@ -366,7 +389,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, *********************************************************************** * * Set up pointers into the WORK array -* +* *********************************************************************** INDTAU = 1 INDD = INDTAU + N @@ -382,6 +405,24 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * *********************************************************************** CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, RANGE, UPLO, IA, IL, INFO, + $ IU, IZ, JA, JZ, LIWORK, LWORK, + $ M, N, NZ, VL, VU, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYEVR inputs:,JOBZ:',A5,',RANGE:',A5, + $ ',UPLO:',A5,',IA:',I5,',IL:',I5,',INFO:',I5, + $ ',IU:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NZ:',I5,',VL:',F9.4, + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW * NPCOL @@ -407,11 +448,11 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Take upper bound for VALEIG case MZ = N END IF -* +* NB = DESCA( NB_ ) IF ( WANTZ ) THEN NP00 = NUMROC( N, NB, 0, 0, NPROW ) - MQ00 = NUMROC( MZ, NB, 0, 0, NPCOL ) + MQ00 = NUMROC( MZ, NB, 0, 0, NPCOL ) INDRW = INDWORK + MAX(18*N, NP00*MQ00 + 2*NB*NB) LWMIN = INDRW - 1 + (ICEIL(MZ, NPROCS) + 2)*N ELSE @@ -436,7 +477,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, *********************************************************************** NNP = MAX( N, NPROCS+1, 4 ) IF ( WANTZ ) THEN - LIWMIN = 12*NNP + 2*N + LIWMIN = 12*NNP + 2*N ELSE LIWMIN = 10*NNP + 2*N END IF @@ -444,12 +485,12 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, *********************************************************************** * * Set up pointers into the IWORK array -* +* *********************************************************************** * Pointer to eigenpair distribution over processors - INDILU = LIWMIN - 2*NPROCS + 1 - SIZE2 = INDILU - 2*N - + INDILU = LIWMIN - 2*NPROCS + 1 + SIZE2 = INDILU - 2*N + *********************************************************************** * @@ -486,9 +527,9 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, INFO = -( 800+NB_ ) END IF IF( WANTZ ) THEN - IAROW = INDXG2P( 1, DESCA( NB_ ), MYROW, + IAROW = INDXG2P( 1, DESCA( NB_ ), MYROW, $ DESCA( RSRC_ ), NPROW ) - IZROW = INDXG2P( 1, DESCA( NB_ ), MYROW, + IZROW = INDXG2P( 1, DESCA( NB_ ), MYROW, $ DESCZ( RSRC_ ), NPROW ) IF( IAROW.NE.IZROW ) THEN INFO = -19 @@ -548,8 +589,16 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVR', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -565,6 +614,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, M = 0 WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -593,6 +646,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'PDSYNTRD', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -602,7 +659,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * *********************************************************************** OFFSET = 0 - IF( IA.EQ.1 .AND. JA.EQ.1 .AND. + IF( IA.EQ.1 .AND. JA.EQ.1 .AND. $ DESCA( RSRC_ ).EQ.0 .AND. DESCA( CSRC_ ).EQ.0 ) $ THEN CALL PDLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ), @@ -638,16 +695,16 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * SET IIL, IIU * *********************************************************************** - IF ( ALLEIG ) THEN + IF ( ALLEIG ) THEN IIL = 1 IIU = N ELSE IF ( INDEIG ) THEN IIL = IL IIU = IU ELSE IF ( VALEIG ) THEN - CALL DLARRC('T', N, VLL, VUU, WORK( INDD2 ), + CALL DLARRC('T', N, VLL, VUU, WORK( INDD2 ), $ WORK( INDE2 + OFFSET ), SAFMIN, EIGCNT, IIL, IIU, INFO) -* Refine upper bound N that was taken +* Refine upper bound N that was taken MZ = EIGCNT IIL = IIL + 1 ENDIF @@ -659,6 +716,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, END IF WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF @@ -684,7 +745,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, MYIU = IWORK(INDILU+NPROCS+MYPROC) - ZOFFSET = MAX(0, MYIL - IIL - 1) + ZOFFSET = MAX(0, MYIL - IIL - 1) FIRST = ( MYIL .EQ. IIL ) @@ -703,10 +764,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, DOU = MYIU - MYIL + 1 CALL DSTEGR2( JOBZ, 'I', N, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), VLL, VUU, MYIL, MYIU, - $ IM, W( 1 ), WORK( INDRW ), N, + $ IM, W( 1 ), WORK( INDRW ), N, $ MYIU - MYIL + 1, - $ IWORK( 1 ), WORK( INDWORK ), SIZE1, - $ IWORK( 2*N+1 ), SIZE2, + $ IWORK( 1 ), WORK( INDWORK ), SIZE1, + $ IWORK( 2*N+1 ), SIZE2, $ DOL, DOU, ZOFFSET, IINFO ) * DSTEGR2 zeroes out the entire W array, so we can't just give * it the part of W we need. So here we copy the W entries into @@ -719,6 +780,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, END IF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF ELSEIF ( WANTZ .AND. NPROCS.EQ.1 ) THEN @@ -731,21 +796,25 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, DOU = MYIU - IIL + 1 CALL DSTEGR2( JOBZ, 'I', N, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), VLL, VUU, IIL, IIU, - $ IM, W( 1 ), WORK( INDRW ), N, + $ IM, W( 1 ), WORK( INDRW ), N, $ N, - $ IWORK( 1 ), WORK( INDWORK ), SIZE1, + $ IWORK( 1 ), WORK( INDWORK ), SIZE1, $ IWORK( 2*N+1 ), SIZE2, DOL, DOU, $ ZOFFSET, IINFO ) ENDIF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF ELSEIF ( WANTZ ) THEN * * Compute representations in parallel. * Share eigenvalue computation for root between all processors -* Then compute the eigenvectors. +* Then compute the eigenvectors. * IINFO = 0 * Part 1. compute root representations and root eigenvalues @@ -754,20 +823,24 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, DOU = MYIU - IIL + 1 CALL DSTEGR2A( JOBZ, 'I', N, WORK( INDD2 ), $ WORK( INDE2+OFFSET ), VLL, VUU, IIL, IIU, - $ IM, W( 1 ), WORK( INDRW ), N, - $ N, WORK( INDWORK ), SIZE1, - $ IWORK( 2*N+1 ), SIZE2, DOL, + $ IM, W( 1 ), WORK( INDRW ), N, + $ N, WORK( INDWORK ), SIZE1, + $ IWORK( 2*N+1 ), SIZE2, DOL, $ DOU, NEEDIL, NEEDIU, $ INDERR, NSPLIT, PIVMIN, SCALE, WL, WU, $ IINFO ) ENDIF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2A', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * The second part of parallel MRRR, the representation tree -* construction begins. Upon successful completion, the +* construction begins. Upon successful completion, the * eigenvectors have been computed. This is indicated by * the flag FINISH. * @@ -780,17 +853,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * * There are currently two ways to communicate eigenvalue information * using the BLACS. -* 1.) BROADCAST +* 1.) BROADCAST * 2.) POINT2POINT between collaborators (those processors working * jointly on a cluster. * For efficiency, BROADCAST has been disabled. -* At a later stage, other more efficient communication algorithms +* At a later stage, other more efficient communication algorithms * might be implemented, e. g. group or tree-based communication. * DOBCST = .FALSE. IF(DOBCST) THEN * First gather everything on the first processor. -* Then use BROADCAST-based communication +* Then use BROADCAST-based communication DO 45 I = 2, NPROCS IF (MYPROC .EQ. (I - 1)) THEN DSTROW = 0 @@ -803,25 +876,25 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LENGTHI = 0 ENDIF IWORK(2) = LENGTHI - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF (( STARTI.GE.1 ) .AND. ( LENGTHI.GE.1 )) THEN LENGTHI2 = 2*LENGTHI * Copy eigenvalues into communication buffer CALL DCOPY(LENGTHI,W( STARTI ),1, - $ WORK( INDD ), 1) + $ WORK( INDD ), 1) * Copy uncertainties into communication buffer CALL DCOPY(LENGTHI,WORK( IINDERR+STARTI-1 ),1, - $ WORK( INDD+LENGTHI ), 1) + $ WORK( INDD+LENGTHI ), 1) * send buffer - CALL DGESD2D( ICTXT, LENGTHI2, + CALL DGESD2D( ICTXT, LENGTHI2, $ 1, WORK( INDD ), LENGTHI2, $ DSTROW, DSTCOL ) END IF ELSE IF (MYPROC .EQ. 0) THEN SRCROW = (I-1) / NPCOL SRCCOL = MOD(I-1, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) STARTI = IWORK(1) LENGTHI = IWORK(2) @@ -832,10 +905,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ WORK(INDD), LENGTHI2, SRCROW, SRCCOL ) * copy eigenvalues from communication buffer CALL DCOPY( LENGTHI, WORK(INDD), 1, - $ W( STARTI ), 1) + $ W( STARTI ), 1) * copy uncertainties (errors) from communication buffer CALL DCOPY(LENGTHI,WORK(INDD+LENGTHI),1, - $ WORK( IINDERR+STARTI-1 ), 1) + $ WORK( IINDERR+STARTI-1 ), 1) END IF END IF 45 CONTINUE @@ -843,10 +916,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LENGTHI2 = LENGTHI * 2 IF (MYPROC .EQ. 0) THEN * Broadcast eigenvalues and errors to all processors - CALL DCOPY(LENGTHI,W ,1, WORK( INDD ), 1) + CALL DCOPY(LENGTHI,W ,1, WORK( INDD ), 1) CALL DCOPY(LENGTHI,WORK( IINDERR ),1, - $ WORK( INDD+LENGTHI ), 1) - CALL DGEBS2D( ICTXT, 'A', ' ', LENGTHI2, 1, + $ WORK( INDD+LENGTHI ), 1) + CALL DGEBS2D( ICTXT, 'A', ' ', LENGTHI2, 1, $ WORK(INDD), LENGTHI2 ) ELSE SRCROW = 0 @@ -855,15 +928,15 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ WORK(INDD), LENGTHI2, SRCROW, SRCCOL ) CALL DCOPY( LENGTHI, WORK(INDD), 1, W, 1) CALL DCOPY(LENGTHI,WORK(INDD+LENGTHI),1, - $ WORK( IINDERR ), 1) + $ WORK( IINDERR ), 1) END IF ELSE * * Enable point2point communication between collaborators * -* Find collaborators of MYPROC +* Find collaborators of MYPROC IF( (NPROCS.GT.1).AND.(MYIL.GT.0) ) THEN - CALL PMPCOL( MYPROC, NPROCS, IIL, NEEDIL, NEEDIU, + CALL PMPCOL( MYPROC, NPROCS, IIL, NEEDIL, NEEDIU, $ IWORK(INDILU), IWORK(INDILU+NPROCS), $ COLBRT, FRSTCL, LASTCL ) ELSE @@ -872,34 +945,34 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, IF(COLBRT) THEN * If the processor collaborates with others, -* communicate information. +* communicate information. DO 47 IPROC = FRSTCL, LASTCL IF (MYPROC .EQ. IPROC) THEN STARTI = DOL IWORK(1) = STARTI LENGTHI = MYIU - MYIL + 1 IWORK(2) = LENGTHI - + IF ((STARTI.GE.1) .AND. (LENGTHI.GE.1)) THEN * Copy eigenvalues into communication buffer CALL DCOPY(LENGTHI,W( STARTI ),1, - $ WORK(INDD), 1) + $ WORK(INDD), 1) * Copy uncertainties into communication buffer CALL DCOPY(LENGTHI, $ WORK( IINDERR+STARTI-1 ),1, - $ WORK(INDD+LENGTHI), 1) + $ WORK(INDD+LENGTHI), 1) ENDIF - DO 46 I = FRSTCL, LASTCL + DO 46 I = FRSTCL, LASTCL IF(I.EQ.MYPROC) GOTO 46 DSTROW = I/ NPCOL DSTCOL = MOD(I, NPCOL) - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF ((STARTI.GE.1) .AND. (LENGTHI.GE.1)) THEN LENGTHI2 = 2*LENGTHI * send buffer - CALL DGESD2D( ICTXT, LENGTHI2, + CALL DGESD2D( ICTXT, LENGTHI2, $ 1, WORK(INDD), LENGTHI2, $ DSTROW, DSTCOL ) END IF @@ -907,7 +980,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, ELSE SRCROW = IPROC / NPCOL SRCCOL = MOD(IPROC, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) RSTARTI = IWORK(1) RLENGTHI = IWORK(2) @@ -918,10 +991,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ SRCROW, SRCCOL ) * copy eigenvalues from communication buffer CALL DCOPY( RLENGTHI, WORK(INDE), 1, - $ W( RSTARTI ), 1) + $ W( RSTARTI ), 1) * copy uncertainties (errors) from communication buffer CALL DCOPY(RLENGTHI,WORK(INDE+RLENGTHI),1, - $ WORK( IINDERR+RSTARTI-1 ), 1) + $ WORK( IINDERR+RSTARTI-1 ), 1) END IF END IF 47 CONTINUE @@ -934,17 +1007,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * is constructed in parallel from top to bottom, * on level at a time, until all eigenvectors * have been computed. -* +* 100 CONTINUE IF ( MYIL.GT.0 ) THEN CALL DSTEGR2B( JOBZ, N, WORK( INDD2 ), - $ WORK( INDE2+OFFSET ), + $ WORK( INDE2+OFFSET ), $ IM, W( 1 ), WORK( INDRW ), N, N, - $ IWORK( 1 ), WORK( INDWORK ), SIZE1, - $ IWORK( 2*N+1 ), SIZE2, DOL, + $ IWORK( 1 ), WORK( INDWORK ), SIZE1, + $ IWORK( 2*N+1 ), SIZE2, DOL, $ DOU, NEEDIL, NEEDIU, INDWLC, $ PIVMIN, SCALE, WL, WU, - $ VSTART, FINISH, + $ VSTART, FINISH, $ MAXCLS, NDEPTH, PARITY, ZOFFSET, IINFO ) IINDWLC = INDWORK + INDWLC - 1 IF(.NOT.FINISH) THEN @@ -958,7 +1031,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LASTCL = MYPROC ENDIF * -* Check if this processor collaborates, i.e. +* Check if this processor collaborates, i.e. * communication is needed. * IF(COLBRT) THEN @@ -976,23 +1049,23 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, * Copy eigenvalues into communication buffer CALL DCOPY(LENGTHI, $ WORK( IINDWLC+STARTI-1 ),1, - $ WORK(INDD), 1) + $ WORK(INDD), 1) * Copy uncertainties into communication buffer CALL DCOPY(LENGTHI, $ WORK( IINDERR+STARTI-1 ),1, - $ WORK(INDD+LENGTHI), 1) + $ WORK(INDD+LENGTHI), 1) ENDIF - - DO 146 I = FRSTCL, LASTCL + + DO 146 I = FRSTCL, LASTCL IF(I.EQ.MYPROC) GOTO 146 DSTROW = I/ NPCOL DSTCOL = MOD(I, NPCOL) - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF ((STARTI.GE.1).AND.(LENGTHI.GE.1)) THEN LENGTHI2 = 2*LENGTHI * send buffer - CALL DGESD2D( ICTXT, LENGTHI2, + CALL DGESD2D( ICTXT, LENGTHI2, $ 1, WORK(INDD), LENGTHI2, $ DSTROW, DSTCOL ) END IF @@ -1000,7 +1073,7 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, ELSE SRCROW = IPROC / NPCOL SRCCOL = MOD(IPROC, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) RSTARTI = IWORK(1) RLENGTHI = IWORK(2) @@ -1011,19 +1084,23 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, $ SRCROW, SRCCOL ) * copy eigenvalues from communication buffer CALL DCOPY(RLENGTHI, WORK(INDE), 1, - $ WORK( IINDWLC+RSTARTI-1 ), 1) + $ WORK( IINDWLC+RSTARTI-1 ), 1) * copy uncertainties (errors) from communication buffer CALL DCOPY(RLENGTHI,WORK(INDE+RLENGTHI),1, - $ WORK( IINDERR+RSTARTI-1 ), 1) + $ WORK( IINDERR+RSTARTI-1 ), 1) END IF END IF 147 CONTINUE ENDIF - GOTO 100 + GOTO 100 ENDIF ENDIF IF (IINFO .NE. 0) THEN CALL PXERBLA( ICTXT, 'DSTEGR2B', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1055,17 +1132,17 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, LENGTHI = 0 ENDIF IWORK(2) = LENGTHI - CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, + CALL IGESD2D( ICTXT, 2, 1, IWORK, 2, $ DSTROW, DSTCOL ) IF ((STARTI.GE.1).AND.(LENGTHI.GE.1)) THEN - CALL DGESD2D( ICTXT, LENGTHI, + CALL DGESD2D( ICTXT, LENGTHI, $ 1, W( STARTI ), LENGTHI, $ DSTROW, DSTCOL ) ENDIF ELSE IF (MYPROC .EQ. 0) THEN SRCROW = (I-1) / NPCOL SRCCOL = MOD(I-1, NPCOL) - CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, + CALL IGERV2D( ICTXT, 2, 1, IWORK, 2, $ SRCROW, SRCCOL ) STARTI = IWORK(1) LENGTHI = IWORK(2) @@ -1100,12 +1177,16 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, CALL DLASRT2( 'I', M, W, IWORK( NPROCS+2 ), IINFO ) IF (IINFO.NE.0) THEN CALL PXERBLA( ICTXT, 'DLASRT2', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF *********************************************************************** * -* TRANSFORM Z FROM 1D WORKSPACE INTO 2D BLOCKCYCLIC STORAGE +* TRANSFORM Z FROM 1D WORKSPACE INTO 2D BLOCKCYCLIC STORAGE * *********************************************************************** IF ( WANTZ ) THEN @@ -1127,12 +1208,12 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, 180 CONTINUE IF ( FIRST ) THEN - CALL PDLAEVSWP(N, WORK( INDRW ), N, Z, IZ, JZ, - $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), + CALL PDLAEVSWP(N, WORK( INDRW ), N, Z, IZ, JZ, + $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), $ INDRW - INDWORK ) ELSE - CALL PDLAEVSWP(N, WORK( INDRW + N ), N, Z, IZ, JZ, - $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), + CALL PDLAEVSWP(N, WORK( INDRW + N ), N, Z, IZ, JZ, + $ DESCZ, IWORK( 1 ), IWORK( NPROCS+M+2 ), WORK( INDWORK ), $ INDRW - INDWORK ) END IF * @@ -1151,6 +1232,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, END IF IF (IINFO.NE.0) THEN CALL PXERBLA( ICTXT, 'PDORMTR', -IINFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1160,6 +1245,10 @@ SUBROUTINE PDSYEVR( JOBZ, RANGE, UPLO, N, A, IA, JA, WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEVR diff --git a/SRC/pdsyevx.f b/SRC/pdsyevx.f index 708fa07c..4592c77c 100644 --- a/SRC/pdsyevx.f +++ b/SRC/pdsyevx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL, @@ -8,6 +14,7 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M, @@ -351,7 +358,7 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * required for optimal performance for all work arrays. Each of * these values is returned in the first entry of the * corresponding work arrays, and no error message is issued by -* PXERBLA. +* PXERBLA. * * IWORK (local workspace) INTEGER array * On return, IWORK(1) contains the amount of integer workspace @@ -508,17 +515,63 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*512 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F QUICKRETURN = ( N.EQ.0 ) * * Test the input arguments. * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, RANGE, UPLO, IA, IL, INFO, + $ IU, IZ, JA, JZ, LIWORK, LWORK, + $ M, N, NZ, ABSTOL, + $ ORFAC, VL, VU, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDSYEVX inputs:,JOBZ:',A5,',RANGE:',A5, + $ ',UPLO:',A5,',IA:',I5,',IL:',I5,',INFO:',I5, + $ ',IU:',I5,',IZ:',I5,',JA:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NZ:',I5,',ABSTOL:',F9.4, + $ ',ORFAC:',F9.4,',VL:',F9.4, + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF INFO = 0 * WANTZ = LSAME( JOBZ, 'V' ) @@ -739,13 +792,25 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYEVX', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F IF( QUICKRETURN ) THEN IF( WANTZ ) THEN NZ = 0 @@ -754,6 +819,10 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, M = 0 WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -971,6 +1040,10 @@ SUBROUTINE PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, WORK( 1 ) = DBLE( LWOPT ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYEVX diff --git a/SRC/pdsygs2.f b/SRC/pdsygs2.f index 31cef2ec..06ca0ca1 100644 --- a/SRC/pdsygs2.f +++ b/SRC/pdsygs2.f @@ -1,4 +1,10 @@ * +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, INFO ) @@ -8,6 +14,7 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N @@ -188,16 +195,52 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, INTEGER INDXG2P EXTERNAL LSAME, INDXG2P * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, IBTYPE, INFO, + $ JA, JB, N, NPROW, NPCOL, MYROW, MYCOL, + $ eos_str + 102 FORMAT('PDSYGS2 inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',IBTYPE:',I5,',INFO:',I5,',JA:',I5, + $ ',JB:',I5,',N:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -251,13 +294,22 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGS2', -INFO ) CALL BLACS_EXIT( ICTXT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) - $ RETURN + IF( N.EQ.0 .OR. ( MYROW.NE.IAROW .OR. MYCOL.NE.IACOL ) ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Compute local information * @@ -414,6 +466,10 @@ SUBROUTINE PDSYGS2( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYGS2 diff --git a/SRC/pdsygst.f b/SRC/pdsygst.f index 85bb5cc4..abec3670 100644 --- a/SRC/pdsygst.f +++ b/SRC/pdsygst.f @@ -1,4 +1,10 @@ * +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +* +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) @@ -8,6 +14,7 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, N @@ -196,10 +203,32 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, INTEGER ICEIL, INDXG2P EXTERNAL LSAME, ICEIL, INDXG2P * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * @@ -208,6 +237,21 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, IBTYPE, INFO, + $ JA, JB, N, SCALE, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDSYGST inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',IBTYPE:',I5,',INFO:',I5,',JA:',I5, + $ ',JB:',I5,',N:',I5,',SCALE:',F9.4, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -268,13 +312,22 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGST', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( IBTYPE.EQ.1 ) THEN IF( UPPER ) THEN @@ -433,6 +486,10 @@ SUBROUTINE PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYGST diff --git a/SRC/pdsygvx.f b/SRC/pdsygvx.f index 5e898bb3..4e598119 100644 --- a/SRC/pdsygvx.f +++ b/SRC/pdsygvx.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, @@ -9,6 +15,7 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * and University of California, Berkeley. * October 15, 1999 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER JOBZ, RANGE, UPLO INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ, @@ -525,16 +532,59 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*576 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) JOBZ, RANGE, UPLO, IA, IB, IBTYPE, + $ IL, INFO, IU, IZ, JA, JB, JZ, + $ LIWORK, LWORK, M, + $ N, NZ, ABSTOL, ORFAC, VL, VU, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYGVX inputs:,JOBZ:',A5,',RANGE:',A5, + $ ',UPLO:',A5,',IA:',I5,',IB:',I5,',IBTYPE:',I5, + $ ',IL:',I5,',INFO:',I5,',IU:',I5, + $ ',IZ:',I5,',JA:',I5,',JB:',I5, + $ ',JZ:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',NZ:',I5,',ABSTOL:',F9.4, + $ ',ORFAC:',F9.4,',VL:',F9.4, + $ ',VU:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -746,8 +796,16 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYGVX ', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -759,6 +817,10 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, WORK( 1 ) = DBLE( LWOPT ) IFAIL( 1 ) = INFO INFO = IERRNPD +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -812,6 +874,10 @@ SUBROUTINE PDSYGVX( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, * IWORK( 1 ) = LIWMIN WORK( 1 ) = DBLE( LWOPT ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYGVX diff --git a/SRC/pdsyngst.f b/SRC/pdsyngst.f index 65512785..43ca40c7 100644 --- a/SRC/pdsyngst.f +++ b/SRC/pdsyngst.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, WORK, LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * and University of California, Berkeley. * October 15, 1999 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, IB, IBTYPE, INFO, JA, JB, LWORK, N @@ -236,9 +243,40 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, IB, IBTYPE, INFO, + $ JA, JB, LWORK, N, SCALE, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDSYNGST inputs:,UPLO:',A5,',IA:',I5,',IB:',I5, + $ ',IBTYPE:',I5,',INFO:',I5,',JA:',I5, + $ ',JB:',I5,',LWORK:',I5,',N:',I5, + $ ',SCALE:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF SCALE = 1.0D0 * NB = DESCA( MB_ ) @@ -316,20 +354,37 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYNGST', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * IF( IBTYPE.NE.1 .OR. UPPER .OR. LWORK.LT.LWOPT ) THEN CALL PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, $ DESCB, SCALE, INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -419,5 +474,9 @@ SUBROUTINE PDSYNGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, * WORK( 1 ) = DBLE( LWOPT ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END diff --git a/SRC/pdsyntrd.f b/SRC/pdsyntrd.f index 3ba66029..c143b613 100644 --- a/SRC/pdsyntrd.f +++ b/SRC/pdsyntrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -286,16 +293,51 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, INT, MAX, MIN, MOD, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYNTRD inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -354,15 +396,28 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYNTRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * ONEPMIN = N*N + 3*N + 1 @@ -545,6 +600,10 @@ SUBROUTINE PDSYNTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * WORK( 1 ) = DBLE( TTLWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYNTRD diff --git a/SRC/pdsytd2.f b/SRC/pdsytd2.f index b316c7fe..5ae69ce1 100644 --- a/SRC/pdsytd2.f +++ b/SRC/pdsytd2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -240,13 +247,42 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYTD2 inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -277,15 +313,28 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTD2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Compute local information * @@ -457,6 +506,10 @@ SUBROUTINE PDSYTD2( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYTD2 diff --git a/SRC/pdsytrd.f b/SRC/pdsytrd.f index f1597cd3..0dc5a882 100644 --- a/SRC/pdsytrd.f +++ b/SRC/pdsytrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -251,13 +258,42 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. Intrinsic Functions .. INTRINSIC DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYTRD inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -307,15 +343,28 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * CALL PB_TOPGET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPGET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) @@ -418,6 +467,10 @@ SUBROUTINE PDSYTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYTRD diff --git a/SRC/pdsyttrd.f b/SRC/pdsyttrd.f index ac98ed6e..4d506ea0 100644 --- a/SRC/pdsyttrd.f +++ b/SRC/pdsyttrd.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, $ LWORK, INFO ) * @@ -5,6 +11,7 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER UPLO INTEGER IA, INFO, JA, LWORK, N @@ -456,10 +463,32 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * .. * * +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * This is just to keep ftnchek and toolpack/1 happy IF( BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* - $ RSRC_.LT.0 )RETURN + $ RSRC_.LT.0 )THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * @@ -483,6 +512,19 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) UPLO, IA, INFO, JA, LWORK, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDSYTTRD inputs:,UPLO:',A5,',IA:',I5,',INFO:',I5, + $ ',JA:',I5,',LWORK:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF * SAFMAX = SQRT( PDLAMCH( ICTXT, 'O' ) ) / N SAFMIN = SQRT( PDLAMCH( ICTXT, 'S' ) ) @@ -566,13 +608,22 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTTRD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * * @@ -656,6 +707,10 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDSYTTRD', -INFO ) WORK( 1 ) = DBLE( LWMIN ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -1190,6 +1245,10 @@ SUBROUTINE PDSYTTRD( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, * * WORK( 1 ) = DBLE( LWMIN ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDSYTTRD diff --git a/SRC/pdtrcon.f b/SRC/pdtrcon.f index 28f253c3..21d1b3b1 100644 --- a/SRC/pdtrcon.f +++ b/SRC/pdtrcon.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, $ WORK, LWORK, IWORK, LIWORK, INFO ) * @@ -7,6 +13,7 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * May 25, 2001 * * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, NORM, UPLO INTEGER IA, JA, INFO, LIWORK, LWORK, N @@ -217,13 +224,44 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, NORM, UPLO, IA, JA, INFO, + $ LIWORK, LWORK, N, RCOND, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRCON inputs:,DIAG:',A5,',NORM:',A5, + $ ',UPLO:',A5,',IA:',I5,',JA:',I5,',INFO:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',N:',I5, + $ ',RCOND:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -302,8 +340,16 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRCON', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -311,6 +357,10 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, * IF( N.EQ.0 ) THEN RCOND = ONE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -419,6 +469,10 @@ SUBROUTINE PDTRCON( NORM, UPLO, DIAG, N, A, IA, JA, DESCA, RCOND, CALL PB_TOPSET( ICTXT, 'Combine', 'Columnwise', COLCTOP ) CALL PB_TOPSET( ICTXT, 'Combine', 'Rowwise', ROWCTOP ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRCON diff --git a/SRC/pdtrord.f b/SRC/pdtrord.f index 38705743..1f63fb76 100644 --- a/SRC/pdtrord.f +++ b/SRC/pdtrord.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, $ DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK, $ IWORK, LIWORK, INFO ) @@ -9,6 +15,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver * May 1 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -319,7 +326,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, $ PITRAF, PDW, WINEIG, WINSIZ, LLDQ, $ RSRC, CSRC, ILILO, ILIHI, ILSEL, IRSRC, $ ICSRC, IPIW, IPW1, IPW2, IPW3, TIHI, TILO, - $ LIHI, WINDOW, LILO, LSEL, BUFFER, + $ LIHI, WINDOW, LILO, LSEL, INT_BUFFER, $ NMWIN2, BUFFLEN, LROWS, LCOLS, ILOC2, JLOC2, $ WNEICR, WINDOW0, RSRC4, CSRC4, LIHI4, RSRC3, $ CSRC3, RSRC2, CSRC2, LIHIC, LIHI1, ILEN4, @@ -354,12 +361,43 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * .. Local Functions .. INTEGER ICEIL * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters. * ICTXT = DESCT( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) COMPQ, INFO, LIWORK, LWORK, + $ M, N, IT, JT, IQ, + $ JQ, NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRORD inputs:,COMPQ:',A5,',INFO:',I5, + $ ',LIWORK:',I5,',LWORK:',I5,',M:',I5, + $ ',N:',I5,',IT:',I5,',JT:',I5,',IQ:',I5, + $ ',JQ:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Test if grid is O.K., i.e., the context is valid. @@ -532,10 +570,18 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, IF( INFO.NE.0 .AND. .NOT.LQUERY ) THEN M = 0 CALL PXERBLA( ICTXT, 'PDTRORD', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LQUERY ) THEN WORK( 1 ) = DBLE(LWMIN) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -988,41 +1034,41 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * BUFFLEN = 0. * IF( MYROW.EQ.RSRC .AND. MYCOL.EQ.CSRC ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( BUFFLEN.NE.0 ) THEN DO 180 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 180 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW2 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN CALL DGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN CALL DGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF END IF ELSEIF( MYROW.EQ.RSRC .OR. MYCOL.EQ.CSRC ) THEN IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND. MYROW.EQ.RSRC ) $ THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( BUFFLEN.NE.0 ) THEN CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC, CSRC ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC, CSRC ) END IF END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND. MYCOL.EQ.CSRC ) $ THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( BUFFLEN.NE.0 ) THEN CALL DGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC, CSRC ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC, CSRC ) END IF END IF IF((NPCOL.GT.1.AND.DIR.EQ.1.AND.MYROW.EQ.RSRC).OR. @@ -1031,10 +1077,10 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, IF( BUFFLEN.NE.0 ) THEN DO 190 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT(WORK( BUFFER+INDX-1 )) + $ INT(WORK( INT_BUFFER+INDX-1 )) 190 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW2 ), DLEN ) END IF END IF @@ -1079,7 +1125,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * Compute amount of work space necessary for performing * matrix-matrix multiplications. * - PDW = BUFFER + PDW = INT_BUFFER IPW3 = PDW + NWIN*NWIN ELSE FLOPS = 0 @@ -2259,107 +2305,107 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * Broadcast the orthogonal transformations. * IF( MYROW.EQ.RSRC1 .AND. MYCOL.EQ.CSRC1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( (NPROW.GT.1 .AND. DIR.EQ.2) .OR. $ (NPCOL.GT.1 .AND. DIR.EQ.1) ) THEN DO 370 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 370 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW3 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) END IF IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN CALL DGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN CALL DGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN ) + $ WORK(INT_BUFFER), BUFFLEN ) END IF ELSEIF( MYROW.EQ.RSRC1 .OR. MYCOL.EQ.CSRC1 ) THEN IF( NPCOL.GT.1 .AND. DIR.EQ.1 .AND. $ MYROW.EQ.RSRC1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC1, CSRC1 ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC1, CSRC1 ) END IF IF( NPROW.GT.1 .AND. DIR.EQ.2 .AND. $ MYCOL.EQ.CSRC1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC1, CSRC1 ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC1, CSRC1 ) END IF IF( (NPCOL.GT.1.AND.DIR.EQ.1.AND.MYROW.EQ.RSRC1) $ .OR. (NPROW.GT.1.AND.DIR.EQ.2.AND. $ MYCOL.EQ.CSRC1) ) THEN DO 380 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT( WORK( BUFFER+INDX-1 ) ) + $ INT( WORK( INT_BUFFER+INDX-1 ) ) 380 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW3 ), DLEN ) END IF END IF IF( RSRC1.NE.RSRC4 ) THEN IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( NPCOL.GT.1 .AND. DIR.EQ.1 ) THEN DO 390 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 390 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW3 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) CALL DGEBS2D( ICTXT, 'Row', TOP, BUFFLEN, - $ 1, WORK(BUFFER), BUFFLEN ) + $ 1, WORK(INT_BUFFER), BUFFLEN ) END IF ELSEIF( MYROW.EQ.RSRC4 .AND. DIR.EQ.1 .AND. $ NPCOL.GT.1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN - CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, - $ 1, WORK(BUFFER), BUFFLEN, RSRC4, CSRC4 ) + CALL DGEBR2D( ICTXT, 'Row', TOP, BUFFLEN, 1, + $ WORK(INT_BUFFER), BUFFLEN, RSRC4, CSRC4 ) DO 400 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT( WORK( BUFFER+INDX-1 ) ) + $ INT( WORK( INT_BUFFER+INDX-1 ) ) 400 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW3 ), DLEN ) END IF END IF IF( CSRC1.NE.CSRC4 ) THEN IF( MYROW.EQ.RSRC4 .AND. MYCOL.EQ.CSRC4 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN IF( NPROW.GT.1 .AND. DIR.EQ.2 ) THEN DO 395 INDX = 1, ILEN - WORK( BUFFER+INDX-1 ) = + WORK( INT_BUFFER+INDX-1 ) = $ DBLE( IWORK(IPIW+INDX-1) ) 395 CONTINUE CALL DLAMOV( 'All', DLEN, 1, WORK( IPW3 ), - $ DLEN, WORK(BUFFER+ILEN), DLEN ) + $ DLEN, WORK(INT_BUFFER+ILEN), DLEN ) CALL DGEBS2D( ICTXT, 'Col', TOP, BUFFLEN, - $ 1, WORK(BUFFER), BUFFLEN ) + $ 1, WORK(INT_BUFFER), BUFFLEN ) END IF ELSEIF( MYCOL.EQ.CSRC4 .AND. DIR.EQ.2 .AND. $ NPROW.GT.1 ) THEN - BUFFER = PDTRAF + INT_BUFFER = PDTRAF BUFFLEN = DLEN + ILEN CALL DGEBR2D( ICTXT, 'Col', TOP, BUFFLEN, 1, - $ WORK(BUFFER), BUFFLEN, RSRC4, CSRC4 ) + $ WORK(INT_BUFFER), BUFFLEN, RSRC4, CSRC4 ) DO 402 INDX = 1, ILEN IWORK(IPIW+INDX-1) = - $ INT( WORK( BUFFER+INDX-1 ) ) + $ INT( WORK( INT_BUFFER+INDX-1 ) ) 402 CONTINUE CALL DLAMOV( 'All', DLEN, 1, - $ WORK( BUFFER+ILEN ), DLEN, + $ WORK( INT_BUFFER+ILEN ), DLEN, $ WORK( IPW3 ), DLEN ) END IF END IF @@ -2390,7 +2436,7 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, IF( ((MYCOL.EQ.CSRC1.OR.MYCOL.EQ.CSRC4).AND.DIR.EQ.2) $ .OR. ((MYROW.EQ.RSRC1.OR.MYROW.EQ.RSRC4).AND. $ DIR.EQ.1)) THEN - IPW4 = BUFFER + IPW4 = INT_BUFFER IF( DIR.EQ.2 ) THEN IF( WANTQ ) THEN QROWS = NUMROC( N, NB, MYROW, DESCQ( RSRC_ ), @@ -3457,6 +3503,10 @@ SUBROUTINE PDTRORD( COMPQ, SELECT, PARA, N, T, IT, JT, * * Return to calling program. * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRORD diff --git a/SRC/pdtrrfs.f b/SRC/pdtrrfs.f index 98c13e7b..3995563e 100644 --- a/SRC/pdtrrfs.f +++ b/SRC/pdtrrfs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, $ BERR, WORK, LWORK, IWORK, LIWORK, INFO ) @@ -7,6 +13,7 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER INFO, IA, IB, IX, JA, JB, JX, LIWORK, LWORK, @@ -281,13 +288,46 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, TRANS, UPLO, INFO, IA, + $ IB, IX, JA, JB, JX, LIWORK, LWORK, + $ N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDTRRFS inputs:,DIAG:',A5,',TRANS:',A5, + $ ',UPLO:',A5,',INFO:',I5,',IA:',I5,',IB:',I5, + $ ',IX:',I5,',JA:',I5,',JB:',I5, + $ ',JX:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',N:',I5,',NRHS:',I5,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters. * INFO = 0 @@ -402,8 +442,16 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, END IF IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRRFS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -418,6 +466,10 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, FERR( JJ ) = ZERO BERR( JJ ) = ZERO 10 CONTINUE +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -789,6 +841,10 @@ SUBROUTINE PDTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, WORK( 1 ) = DBLE( LWMIN ) IWORK( 1 ) = LIWMIN * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRRFS diff --git a/SRC/pdtrsen.f b/SRC/pdtrsen.f index c65ea911..cce59220 100644 --- a/SRC/pdtrsen.f +++ b/SRC/pdtrsen.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, $ DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, S, SEP, WORK, LWORK, $ IWORK, LIWORK, INFO ) @@ -10,6 +16,7 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, * Univ. of Colorado Denver and University of California, Berkeley. * January, 2012 * + USE LINK_TO_C_GLOBALS IMPLICIT NONE * * .. Scalar Arguments .. @@ -379,12 +386,45 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, * .. Intrinsic Functions .. INTRINSIC MAX, MIN, SQRT * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*448 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCT( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) COMPQ, JOB, INFO, LIWORK, LWORK, + $ M, N, IT, JT, + $ IQ, JQ, S, SEP, NPROW, NPCOL, MYROW, + $ MYCOL, eos_str + 102 FORMAT('PDTRSEN inputs:,COMPQ:',A5,',JOB:',A5, + $ ',INFO:',I5,',LIWORK:',I5,',LWORK:',I5, + $ ',M:',I5,',N:',I5,',IT:',I5,',JT:',I5, + $ ',IQ:',I5,',JQ:',I5,',S:',F9.4, + $ ',SEP:',F9.4,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF NPROCS = NPROW*NPCOL * * Test if grid is O.K., i.e., the context is valid @@ -615,10 +655,18 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, S = ONE SEP = ZERO CALL PXERBLA( ICTXT, 'PDTRSEN', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSEIF( LQUERY ) THEN WORK( 1 ) = DBLE(LWMIN) IWORK( 1 ) = LIWMIN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -703,6 +751,10 @@ SUBROUTINE PDTRSEN( JOB, COMPQ, SELECT, PARA, N, T, IT, JT, * 50 CONTINUE * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRSEN diff --git a/SRC/pdtrti2.f b/SRC/pdtrti2.f index 6d8ba7e5..ac243b32 100644 --- a/SRC/pdtrti2.f +++ b/SRC/pdtrti2.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N @@ -147,13 +154,42 @@ SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) LOGICAL LSAME EXTERNAL LSAME * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, UPLO, IA, INFO, JA, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRTI2 inputs:,DIAG:',A5,',UPLO:',A5, + $ ',IA:',I5,',INFO:',I5,',JA:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -173,6 +209,10 @@ SUBROUTINE PDTRTI2( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTI2', -INFO ) CALL BLACS_ABORT( ICTXT, 1 ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * diff --git a/SRC/pdtrtri.f b/SRC/pdtrtri.f index 719a0ab0..88fc4b00 100644 --- a/SRC/pdtrtri.f +++ b/SRC/pdtrtri.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * * -- ScaLAPACK routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, UPLO INTEGER IA, INFO, JA, N @@ -160,13 +167,42 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*320 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, UPLO, IA, INFO, JA, N, + $ NPROW, NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTRTRI inputs:,DIAG:',A5,',UPLO:',A5, + $ ',IA:',I5,',INFO:',I5,',JA:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test input parameters * INFO = 0 @@ -210,13 +246,22 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTRI', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN + IF( N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Check for singularity if non-unit. * @@ -265,8 +310,13 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUMMY, $ IDUMMY, -1, -1, MYCOL ) - IF( INFO.NE.0 ) - $ RETURN + IF( INFO.NE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF END IF * * Use blocked code @@ -346,6 +396,10 @@ SUBROUTINE PDTRTRI( UPLO, DIAG, N, A, IA, JA, DESCA, INFO ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End PDTRTRI diff --git a/SRC/pdtrtrs.f b/SRC/pdtrtrs.f index 132640af..1e5ae630 100644 --- a/SRC/pdtrtrs.f +++ b/SRC/pdtrtrs.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, $ B, IB, JB, DESCB, INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. CHARACTER DIAG, TRANS, UPLO INTEGER IA, IB, INFO, JA, JB, N, NRHS @@ -190,13 +197,44 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*384 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) DIAG, TRANS, UPLO, IA, IB, INFO, + $ JA, JB, N, NRHS, NPROW, NPCOL, + $ MYROW, MYCOL, eos_str + 102 FORMAT('PDTRTRS inputs:,DIAG:',A5,',TRANS:',A5, + $ ',UPLO:',A5,',IA:',I5,',IB:',I5,',INFO:',I5, + $ ',JA:',I5,',JB:',I5,',N:',I5, + $ ',NRHS:',I5,',NPROW:',I5,',NPCOL:',I5, + $ ',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test input parameters * INFO = 0 @@ -261,13 +299,22 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTRTRS', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( N.EQ.0 .OR. NRHS.EQ.0 ) - $ RETURN + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * * Check for singularity if non-unit. * @@ -317,8 +364,13 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, 30 CONTINUE CALL IGAMX2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, IDUM, IDUM, $ -1, -1, MYCOL ) - IF( INFO.NE.0 ) - $ RETURN + IF( INFO.NE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF END IF * * Solve A * x = b or A' * x = b. @@ -326,6 +378,10 @@ SUBROUTINE PDTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, IA, JA, DESCA, CALL PDTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, IA, JA, $ DESCA, B, IB, JB, DESCB ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTRTRS diff --git a/SRC/pdtzrzf.f b/SRC/pdtzrzf.f index f72342fb..9c9727ef 100644 --- a/SRC/pdtzrzf.f +++ b/SRC/pdtzrzf.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) * @@ -6,6 +12,7 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * and University of California, Berkeley. * May 25, 2001 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IA, INFO, JA, LWORK, M, N * .. @@ -211,13 +218,42 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, MAX, MIN, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IA, INFO, JA, LWORK, M, N, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDTZRZF inputs:,IA:',I5,',INFO:',I5,',JA:',I5, + $ ',LWORK:',I5,',M:',I5,',N:',I5, + $ ',NPROW:',I5,',NPCOL:',I5,',MYROW:',I5, + $ ',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Test the input parameters * INFO = 0 @@ -256,15 +292,28 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * IF( INFO.NE.0 ) THEN CALL PXERBLA( ICTXT, 'PDTZRZF', -INFO ) +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN ELSE IF( LQUERY ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * * Quick return if possible * - IF( M.EQ.0 .OR. N.EQ.0 ) - $ RETURN + IF( M.EQ.0 .OR. N.EQ.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * IF( M.EQ.N ) THEN * @@ -327,6 +376,10 @@ SUBROUTINE PDTZRZF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * WORK( 1 ) = DBLE( LWMIN ) * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDTZRZF diff --git a/SRC/pdzsum1.f b/SRC/pdzsum1.f index 09e5f6f3..23950091 100644 --- a/SRC/pdzsum1.f +++ b/SRC/pdzsum1.f @@ -1,3 +1,9 @@ +* +* Copyright (c) 2023 Advanced Micro Devices, Inc.  All rights reserved. +* +* +#include "SL_Context_fortran_include.h" +* SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * * -- ScaLAPACK auxiliary routine (version 1.7) -- @@ -5,6 +11,7 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * and University of California, Berkeley. * May 1, 1997 * + USE LINK_TO_C_GLOBALS * .. Scalar Arguments .. INTEGER IX, INCX, JX, N DOUBLE PRECISION ASUM @@ -163,16 +170,49 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * .. Intrinsic Functions .. INTRINSIC ABS, MOD * .. +* .. LOG variables declaration .. +* .. +* BUFFER size: Function name and Process grid info (128 Bytes) + +* Variable names + Variable values(num_vars *10) + CHARACTER BUFFER*256 + CHARACTER*2, PARAMETER :: eos_str = '' // C_NULL_CHAR * .. Executable Statements .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* +* +* Capture the subroutine entry in the trace file +* + AOCL_DTL_TRACE_ENTRY_F * ICTXT = DESCX( CTXT_ ) CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * +* Update the log buffer with the scalar arguments details, +* MPI process grid information and write to the log file +* + IF( SCALAPACK_CONTEXT%IS_LOG_ENABLED.EQ.1 ) THEN + WRITE(BUFFER,102) IX, INCX, JX, N, ASUM, NPROW, + $ NPCOL, MYROW, MYCOL, eos_str + 102 FORMAT('PDZSUM1 inputs:,IX:',I5,',INCX:',I5,',JX:',I5, + $ ',N:',I5,',ASUM:',F9.4,',NPROW:',I5, + $ ',NPCOL:',I5,',MYROW:',I5,',MYCOL:',I5,A1) + AOCL_DTL_LOG_ENTRY_F + END IF +* * Quick return if possible * ASUM = ZERO - IF( N.LE.0 ) - $ RETURN + IF( N.LE.0 ) THEN +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F + RETURN + END IF * LDX = DESCX( LLD_ ) CALL INFOG2L( IX, JX, DESCX, NPROW, NPCOL, MYROW, MYCOL, IIX, JJX, @@ -182,6 +222,10 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.IXCOL ) THEN ASUM = ABS( X( IIX+(JJX-1)*LDX ) ) END IF +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN END IF * @@ -217,6 +261,10 @@ SUBROUTINE PDZSUM1( N, ASUM, X, IX, JX, DESCX, INCX ) * END IF * +* +* Capture the subroutine exit in the trace file +* + AOCL_DTL_TRACE_EXIT_F RETURN * * End of PDZSUM1 diff --git a/SRC/psgeqrf.f b/SRC/psgeqrf.f index 6f19f1fe..cfcfdcf2 100644 --- a/SRC/psgeqrf.f +++ b/SRC/psgeqrf.f @@ -1,9 +1,13 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -171,11 +175,18 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP @@ -197,6 +208,12 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC MIN, MOD, REAL * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -262,10 +279,12 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, JB = JN - JA + 1 * #ifdef AOCL_PROGRESS +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PSGEQRF' + END IF #endif * * @@ -295,10 +314,18 @@ SUBROUTINE PSGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) -#endif +* Update the progress and callback if progress is enabled * + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF +#endif * * Compute the QR factorization of the current block * A(i:ia+m-1,j:j+jb-1) diff --git a/SRC/psgerfs.f b/SRC/psgerfs.f index 792d0782..5a9de957 100644 --- a/SRC/psgerfs.f +++ b/SRC/psgerfs.f @@ -302,6 +302,10 @@ SUBROUTINE PSGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * .. * .. Executable Statements .. * +* +* .. Initialize EST + EST = (0.0, 0.0) + * Get grid parameters * ICTXT = DESCA( CTXT_ ) diff --git a/SRC/psgetrf.f b/SRC/psgetrf.f index ba9be56d..3732d156 100644 --- a/SRC/psgetrf.f +++ b/SRC/psgetrf.f @@ -1,8 +1,11 @@ * -- ScaLAPACK routine -- -* Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. -* June 10, 2022 +* Copyright (c) 2020-23 Advanced Micro Devices, Inc.  All rights reserved. +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -150,11 +153,18 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -170,6 +180,12 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -230,10 +246,12 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) JB = JN - JA + 1 * #ifdef AOCL_PROGRESS - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PSGETRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * * Factor diagonal and subdiagonal blocks and test for exact @@ -271,10 +289,20 @@ SUBROUTINE PSGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * +* * Factor diagonal and subdiagonal blocks and test for exact * singularity. * diff --git a/SRC/psporfs.f b/SRC/psporfs.f index 38401354..34a228c5 100644 --- a/SRC/psporfs.f +++ b/SRC/psporfs.f @@ -298,6 +298,8 @@ SUBROUTINE PSPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL * .. * .. Executable Statements .. +* .. Initialize EST + EST = 0.0 * * Get grid parameters * diff --git a/SRC/pspotrf.f b/SRC/pspotrf.f index 360d25a5..461ae517 100644 --- a/SRC/pspotrf.f +++ b/SRC/pspotrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -152,12 +156,18 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) CHARACTER COLBTOP, ROWBTOP INTEGER I, ICOFF, ICTXT, IROFF, J, JB, JN, MYCOL, $ MYROW, NPCOL, NPROW -* +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Arrays .. INTEGER IDUM1( 1 ), IDUM2( 1 ) * .. @@ -174,6 +184,12 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -223,10 +239,12 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) $ RETURN * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PSPOTRF' - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) @@ -272,9 +290,19 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 10 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -341,9 +369,19 @@ SUBROUTINE PSPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) DO 20 J = JN+1, JA+N-1, DESCA( NB_ ) JB = MIN( N-J+JA, DESCA( NB_ ) ) I = IA + J - JA +* #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block diff --git a/SRC/pxsyevx.h b/SRC/pxsyevx.h index dcc1e323..b0487052 100644 --- a/SRC/pxsyevx.h +++ b/SRC/pxsyevx.h @@ -9,7 +9,7 @@ * NOCHANGE indicates that fortran will be calling, and that it expects * the name called by fortran to be identical to that compiled by the C * (RS6K's do this). UPCASE says it expects C routines called by fortran - * to be in all upcase (CRAY wants this). + * to be in all upcase (CRAY wants this). */ #define ADD_ 0 @@ -49,7 +49,7 @@ #if (F77_CALL_C == UPCASE) /* * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine + * routine call a C routine * following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void PDGEMM(...) @@ -64,18 +64,17 @@ #define pslachkieee_ PSLACHKIEEE #define pslaiect_ PSLAIECT -#define get_aocl_scalapack_version_ GET_AOCL_SCALAPACK_VERSION -#define aocl_scalapack_progress_ AOCL_SCALAPACK_PROGRESS -#define aocl_dtl_trace_entry_ AOCL_DTL_TRACE_ENTRY -#define aocl_dtl_trace_exit_ AOCL_DTL_TRACE_EXIT -#define aocl_dtl_log_entry_ AOCL_DTL_LOG_ENTRY -#define aocl_dtl_log_exit_ AOCL_DTL_LOG_EXIT +#define get_aocl_scalapack_version_ GET_AOCL_SCALAPACK_VERSION +#define aocl_sl_dtl_trace_entry_ AOCL_SL_DTL_TRACE_ENTRY +#define aocl_sl_dtl_trace_exit_ AOCL_SL_DTL_TRACE_EXIT +#define aocl_sl_dtl_log_entry_ AOCL_SL_DTL_LOG_ENTRY +#define aocl_dtl_log_exit_ AOCL_DTL_LOG_EXIT #endif #if (F77_CALL_C == NOCHANGE) /* * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine + * routine call a C routine * for following Fortran to C interface: * FORTRAN CALL C DECLARATION * call pdgemm(...) void pdgemm(...) @@ -90,10 +89,12 @@ #define pslachkieee_ pslachkieee #define pslaiect_ pslaiect -#define get_aocl_scalapack_version_ get_aocl_scalapack_version -#define aocl_scalapack_progress_ aocl_scalapack_progress -#define aocl_dtl_trace_entry_ aocl_dtl_trace_entry -#define aocl_dtl_trace_exit_ aocl_dtl_trace_exit -#define aocl_dtl_log_entry_ aocl_dtl_log_entry -#define aocl_dtl_log_exit_ aocl_dtl_log_exit +#define get_aocl_scalapack_version_ get_aocl_scalapack_version +#define aocl_scalapack_progress_ aocl_scalapack_progress +#define aocl_dtl_trace_entry_ aocl_dtl_trace_entry +#define aocl_dtl_trace_exit_ aocl_dtl_trace_exit +#define aocl_dtl_log_entry_ aocl_dtl_log_entry +#define aocl_dtl_log_exit_ aocl_dtl_log_exit +#define aocl_scalapack_init_ aocl_scalapack_init + #endif diff --git a/SRC/pzgeqrf.f b/SRC/pzgeqrf.f index 9490d51e..b1847de3 100644 --- a/SRC/pzgeqrf.f +++ b/SRC/pzgeqrf.f @@ -1,9 +1,13 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -171,11 +175,18 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif * .. +* .. * .. Local Scalars .. LOGICAL LQUERY CHARACTER COLBTOP, ROWBTOP @@ -198,6 +209,12 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -263,10 +280,12 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, JB = JN - JA + 1 * #ifdef AOCL_PROGRESS +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN CURRENT_RANK = MYCOL+MYROW*NPCOL TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PZGEQRF' + END IF #endif * * Compute the QR factorization of the first block A(ia:ia+m-1,ja:jn) @@ -295,8 +314,17 @@ SUBROUTINE PZGEQRF( M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Compute the QR factorization of the current block diff --git a/SRC/pzgerfs.f b/SRC/pzgerfs.f index 9bbf0459..c22a7b89 100644 --- a/SRC/pzgerfs.f +++ b/SRC/pzgerfs.f @@ -309,6 +309,9 @@ SUBROUTINE PZGERFS( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, * .. * .. Executable Statements .. * +* .. Initialize EST + EST = (0.0, 0.0) +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) diff --git a/SRC/pzgetrf.f b/SRC/pzgetrf.f index 4ed09c8a..550d9ffc 100644 --- a/SRC/pzgetrf.f +++ b/SRC/pzgetrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 10, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -150,10 +154,17 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) INTEGER I, ICOFF, ICTXT, IINFO, IN, IROFF, J, JB, JN, $ MN, MYCOL, MYROW, NPCOL, NPROW * +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif +* .. * * .. * .. Local Arrays .. @@ -171,6 +182,12 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) * .. Intrinsic Functions .. INTRINSIC MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * * Get grid parameters @@ -231,10 +248,12 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) JB = JN - JA + 1 * #ifdef AOCL_PROGRESS - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL - LSTAGE = 7 - API_NAME = 'PZGETRF' +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * * Factor diagonal and subdiagonal blocks and test for exact @@ -272,10 +291,20 @@ SUBROUTINE PZGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * +* * Factor diagonal and subdiagonal blocks and test for exact * singularity. * diff --git a/SRC/pzporfs.f b/SRC/pzporfs.f index 7d76c0d7..ec756d7f 100644 --- a/SRC/pzporfs.f +++ b/SRC/pzporfs.f @@ -306,6 +306,8 @@ SUBROUTINE PZPORFS( UPLO, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) * .. * .. Executable Statements .. +* .. Initialize EST + EST = (0.0, 0.0) * * Get grid parameters * diff --git a/SRC/pzpotrf.f b/SRC/pzpotrf.f index cc00003d..750fde3e 100644 --- a/SRC/pzpotrf.f +++ b/SRC/pzpotrf.f @@ -1,8 +1,12 @@ * -- ScaLAPACK routine -- * Copyright (c) 2020-22 Advanced Micro Devices, Inc.  All rights reserved. * June 20, 2022 +* +#include "SL_Context_fortran_include.h" * SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) +* + USE LINK_TO_C_GLOBALS * * -- ScaLAPACK routine (version 1.7) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, @@ -148,11 +152,17 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) PARAMETER ( ONE = 1.0D+0 ) COMPLEX*16 CONE PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) -* +* .. #ifdef AOCL_PROGRESS - INTEGER TOTAL_MPI_PROCESSES, LSTAGE, CURRENT_RANK - CHARACTER*7 API_NAME +* .. AOCL Progress variables .. + INTEGER TOTAL_MPI_PROCESSES, CURRENT_RANK, PROGRESS +* +* .. Declaring 'API NAME' and its length as const objects +* .. API_NAME string terminated with 'NULL' character. + CHARACTER*8, PARAMETER :: API_NAME = FUNCTION_NAME // C_NULL_CHAR + INTEGER, PARAMETER :: LEN_API_NAME = 8 #endif +* .. * .. Local Scalars .. LOGICAL UPPER CHARACTER COLBTOP, ROWBTOP @@ -175,8 +185,18 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) * .. Intrinsic Functions .. INTRINSIC ICHAR, MIN, MOD * .. +* +* Initialize framework context structure if not initialized +* +* + CALL AOCL_SCALAPACK_INIT( ) +* * .. Executable Statements .. * +* Initialize framework context structure if not initialized +* + CALL AOCL_SCALAPACK_INIT( ) +* * Get grid parameters * ICTXT = DESCA( CTXT_ ) @@ -224,10 +244,12 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) $ RETURN * #ifdef AOCL_PROGRESS - LSTAGE = 7 - API_NAME = 'PDPOTRF' - CURRENT_RANK = MYCOL+MYROW*NPCOL - TOTAL_MPI_PROCESSES = NPROW*NPCOL +* Set the AOCL progress variables related to rank, processes +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN + CURRENT_RANK = MYCOL+MYROW*NPCOL + TOTAL_MPI_PROCESSES = NPROW*NPCOL + END IF #endif * CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP ) @@ -275,8 +297,17 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block @@ -345,8 +376,17 @@ SUBROUTINE PZPOTRF( UPLO, N, A, IA, JA, DESCA, INFO ) I = IA + J - JA * #ifdef AOCL_PROGRESS - CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LSTAGE, - $ J, CURRENT_RANK, TOTAL_MPI_PROCESSES ) +* Update the progress and callback if progress is enabled +* + IF( SCALAPACK_CONTEXT%IS_PROGRESS_ENABLED.EQ.1 ) THEN +* +* Capture the Loop count 'J' to a separate 'PROGRESS' +* variable to avoid the corruption at application side. +* + PROGRESS = J + CALL AOCL_SCALAPACK_PROGRESS ( API_NAME, LEN_API_NAME, + $ PROGRESS, CURRENT_RANK, TOTAL_MPI_PROCESSES ) + END IF #endif * * Perform unblocked Cholesky factorization on JB block diff --git a/TESTING/AOCL_PROGRESS_TESTS/README.txt b/TESTING/AOCL_PROGRESS_TESTS/README.txt index 3a3d83a4..70f26930 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/README.txt +++ b/TESTING/AOCL_PROGRESS_TESTS/README.txt @@ -1,60 +1,66 @@ -Checking AOCL-ScaLAPACK Operation Progress +Checking the progress of AOCL-ScaLAPACK Operations +================================================== -AOCL libraries may be used to perform lengthy computations (for example, matrix multiplications, solver involving large matrices). These operations/computations may go on for hours. +AOCL libraries may be used to perform lengthy computations (Eg: matrix multiplications, +solver involving large matrices, etc). These operations/computations may go on for hours. -AOCL progress feature provides mechanism for the application to check how far the computations have progressed. Selected set of APIs of AOCL libraries periodically updates the application with progress made so far via a callback function. +AOCL progress feature provides mechanism for the application to check how far +the computations have progressed. Selected set of APIs of AOCL libraries +periodically updates the application with progress made so far via a callback function. -Usage: +AOCL progress is supported for the below APIs: + 1) Cholesky (pcpotrf, pdpotrf, pspotrf, pzpotrf) + 2) LU factorization (pcgetrf, pdgetrf, psgetrf, pzgetrf) + 3) QR factorization (pcgeqrf, pdgeqrf, psgeqrf, pzgeqrf) -The Application needs to define a callback function in specific format and register this callback function with the AOCL-ScaLAPACK library. +Usage +===== +The Application needs to define a callback function in specific +format and register this callback function with the AOCL-ScaLAPACK library. The callback function prototype must be as defined below. -int aocl_scalapack_progress( -char* api, -integer lenapi, -integer *progress, -integer *mpi_rank, -integer *total_mpi_processes -) - - -The table below explains various parameters +int aocl_scalapack_progress(const char *const api, + const integer *lenapi, + const integer *progress, + const integer *mpi_rank, + const integer *total_mpi_processes) + +The table below explains various parameters: +----------------------------------------------------------------------------- Parameters | Purpose ---------------------------------------------------------------------- +----------------------------------------------------------------------------- api | Name of the API which is currently running lenapi | Length of API name character buffer progress | Linear progress made in current thread so far mpi_rank | Current process rank total_mpi_processes | Total number of processes used to perform the operation -Callback Registration: - -The callback function must be registered with library for it to report the progress. +Callback Registration: +---------------------- +The callback function must be registered with library for it to report the progress. -aocl_scalapack_set_progress(aocl_scalapack_progress); +aocl_scalapack_set_progress(aocl_scalapack_progress); Example: -int aocl_scalapack_progress(char* api, int *lenapi, int *progress, int *mpi_rank, int *total_mpi_processes) +-------- +int aocl_scalapack_progress(const char* const api, const int *lenapi, + const int *progress, const int *mpi_rank, + const int *total_mpi_processes) { - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api, *progress,*total_mpi_processes ); + printf("In AOCL Progress MPI Rank:%i, API:%s, progress:%i, MPI processes:%i\n", + *mpi_rank, api, *progress,*total_mpi_processes); return 0; } -Limitations -- AOCL-ScALAPACK Progress feature is currently supported only on Linux - - -Procedure to build and run the sample application with aocl progress feature: -============================================================================= +Procedure to build and run the sample application with aocl progress feature +============================================================================ -1) copy below AOCL libraries to the "/EXAMPLE/aocl_progress_example/" folder: - 1) libscalapack.a - 2) libflame.a - 3) blis-mt.a +1) The scalapack build system generates aocl-progress related test binaries + along with test suite application as part of the build process. + Refer AOCL User guide for the scalapack build process. -2) Run the below command to build the 'pdgetrf' test application with 'aocl-progress' feature. - mpicc -O0 -g pdgerf_example_app.c libscalapack.a libflame.a -fopenmp libblis-mt.a -lm -lgfortran -o test +2) The aocl-progress related tests generated in 'TESTING/AOCL_PROGRESS_TESTS' folder in the build folder. -3) Run the below commands to run the application: - Ex: mpirun -np 4 ./test 32 8 2 2 - mpirun -np 8 ./test 1024 32 4 2 +3) The aocl-progress related tests can be run with the below command: + Eg: mpirun -np 4 ./xap_pdgetrf 32 8 2 2 + mpirun -np 8 ./xap_pdgetrf 1024 32 4 2 diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c index 1de4f3dd..cd3e8052 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgeqrf.c @@ -1,156 +1,156 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_float float _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); - -/* Target API Prototype */ -void pcgerqf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, SL_complex_float*, SL_complex_float*, Int*, Int*); - -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = atoi(argv[2]); - } - if(argc > 3) { - mb = atoi(argv[3]); - } - if(argc > 4) { - nb = atoi(argv[4]); - } - if(argc > 5) { - nprow = atoi(argv[5]); - } - if(argc > 6) { - npcol = atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_float *A; - A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - SL_complex_float work_buffer_size; - SL_complex_float *work, *tau; - Int lwork = -1; - tau = (SL_complex_float *)calloc((mpA+nqA),sizeof(SL_complex_float)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (SL_complex_float *)calloc(work_buffer_size, sizeof(SL_complex_float)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pcgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in pcgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_float float _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); + +/* Target API Prototype */ +void pcgerqf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, SL_complex_float*, SL_complex_float*, Int*, Int*); + +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = atoi(argv[2]); + } + if(argc > 3) { + mb = atoi(argv[3]); + } + if(argc > 4) { + nb = atoi(argv[4]); + } + if(argc > 5) { + nprow = atoi(argv[5]); + } + if(argc > 6) { + npcol = atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_float *A; + A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + SL_complex_float work_buffer_size; + SL_complex_float *work, *tau; + Int lwork = -1; + tau = (SL_complex_float *)calloc((mpA+nqA),sizeof(SL_complex_float)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (SL_complex_float *)calloc(work_buffer_size, sizeof(SL_complex_float)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pcgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pcgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in pcgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c index b17c83bd..127a1c5e 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcgetrf.c @@ -1,141 +1,141 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_float float _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pcgetrf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_float *A; - Int *IPPIV; - A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pcgetrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pcgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pcgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pcgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_float float _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pcgetrf_(Int*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_float *A; + Int *IPPIV; + A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pcgetrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pcgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pcgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pcgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c index 084f6387..b444c2b3 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pcpotrf.c @@ -1,136 +1,136 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_float float _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pcpotrf_(char*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - //assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_float *A; - A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pcpotrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pcpotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pcpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pcpotrf, info = %d\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_float float _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pcpotrf_(char*, Int*, SL_complex_float*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + //assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_float *A; + A = (SL_complex_float *)calloc(mpA*nqA,sizeof(SL_complex_float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pcpotrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pcpotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pcpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pcpotrf, info = %d\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c index 06b3d2f0..230c2294 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgeqrf.c @@ -1,152 +1,152 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pdgerqf_(Int*, Int*, double*, Int*, Int*, Int*, double*, double*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = atoi(argv[2]); - } - if(argc > 3) { - mb = atoi(argv[3]); - } - if(argc > 4) { - nb = atoi(argv[4]); - } - if(argc > 5) { - nprow = atoi(argv[5]); - } - if(argc > 6) { - npcol = atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - double work_buffer_size; - double *work, *tau; - Int lwork = -1; - tau = (double *)calloc((mpA+nqA),sizeof(double)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (double *)calloc(work_buffer_size, sizeof(double)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in pdgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pdgerqf_(Int*, Int*, double*, Int*, Int*, Int*, double*, double*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = atoi(argv[2]); + } + if(argc > 3) { + mb = atoi(argv[3]); + } + if(argc > 4) { + nb = atoi(argv[4]); + } + if(argc > 5) { + nprow = atoi(argv[5]); + } + if(argc > 6) { + npcol = atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + double work_buffer_size; + double *work, *tau; + Int lwork = -1; + tau = (double *)calloc((mpA+nqA),sizeof(double)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (double *)calloc(work_buffer_size, sizeof(double)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in pdgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c index 3d29301d..71f2c2a1 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdgetrf.c @@ -1,139 +1,139 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pdgetrf_(Int*, Int*, double*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - Int *IPPIV; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pdgetrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pdgetrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pdgetrf_(Int*, Int*, double*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes); + +Int AOCL_progress(const char* const api, const Int *lenapi, const Int *progress, const Int *mpi_rank, const Int *total_mpi_processes) +{ + char api_name [30]; + memcpy(api_name, api, *lenapi); + api_name[*lenapi - 1] = '\0'; + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i \n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + Int *IPPIV; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pdgetrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pdgetrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c index 5da96566..307f916a 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pdpotrf.c @@ -1,134 +1,134 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pdpotrf_(char*, Int*, double*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - double *A; - A = (double *)calloc(mpA*nqA,sizeof(double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pdpotrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pdpotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pdpotrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pdpotrf_(char*, Int*, double*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + double *A; + A = (double *)calloc(mpA*nqA,sizeof(double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pdpotrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pdpotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pdpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pdpotrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c index 98053ce5..2d4493fc 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgeqrf.c @@ -1,153 +1,153 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -//void pdpotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); -void pdgerqf_(Int*, Int*, float*, Int*, Int*, Int*, float*, float*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = atoi(argv[2]); - } - if(argc > 3) { - mb = atoi(argv[3]); - } - if(argc > 4) { - nb = atoi(argv[4]); - } - if(argc > 5) { - nprow = atoi(argv[5]); - } - if(argc > 6) { - npcol = atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - float *A; - A = (float *)calloc(mpA*nqA,sizeof(float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - float work_buffer_size; - float *work, *tau; - Int lwork = -1; - tau = (float *)calloc((mpA+nqA),sizeof(float)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (float *)calloc(work_buffer_size, sizeof(float)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting psgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in psgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +//void pdpotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); +void pdgerqf_(Int*, Int*, float*, Int*, Int*, Int*, float*, float*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = atoi(argv[2]); + } + if(argc > 3) { + mb = atoi(argv[3]); + } + if(argc > 4) { + nb = atoi(argv[4]); + } + if(argc > 5) { + nprow = atoi(argv[5]); + } + if(argc > 6) { + npcol = atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + float *A; + A = (float *)calloc(mpA*nqA,sizeof(float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + float work_buffer_size; + float *work, *tau; + Int lwork = -1; + tau = (float *)calloc((mpA+nqA),sizeof(float)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (float *)calloc(work_buffer_size, sizeof(float)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting psgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + psgeqrf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in psgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c index 6cf8e1dc..e574565c 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_psgetrf.c @@ -1,140 +1,140 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void psgetrf_(Int*, Int*, float*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - float *A; - Int *IPPIV; - A = (float *)calloc(mpA*nqA,sizeof(float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run psgetrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting psgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - psgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in psgetrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void psgetrf_(Int*, Int*, float*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + float *A; + Int *IPPIV; + A = (float *)calloc(mpA*nqA,sizeof(float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run psgetrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting psgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + psgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in psgetrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c index 2eaf1b13..d91ccaa3 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pspotrf.c @@ -1,133 +1,133 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pspotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - //assert(nprow * npcol == nprocs_mpi); - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - float *A; - A = (float *)calloc(mpA*nqA,sizeof(float)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pspotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pspotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pspotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pspotrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pspotrf_(char*, Int*, float*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + //assert(nprow * npcol == nprocs_mpi); + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + float *A; + A = (float *)calloc(mpA*nqA,sizeof(float)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pspotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pspotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pspotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pspotrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c index d1ae33c1..90e6880c 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgeqrf.c @@ -1,157 +1,157 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_double double _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); - -/* Target API Prototype */ -void pzgerqf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, SL_complex_double*, SL_complex_double*, Int*, Int*); - -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int jone=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int m = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - Int mb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size_rows matrix_size_columns block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - m = atoi(argv[1]); - } - if(argc > 2) { - n = (Int)atoi(argv[2]); - } - if(argc > 3) { - mb = (Int)atoi(argv[3]); - } - if(argc > 4) { - nb = (Int)atoi(argv[4]); - } - if(argc > 5) { - nprow = (Int)atoi(argv[5]); - } - if(argc > 6) { - npcol = (Int)atoi(argv[6]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_double *A; - A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - SL_complex_double work_buffer_size; - SL_complex_double *work, *tau; - Int lwork = -1; - tau = (SL_complex_double *)calloc((mpA+nqA),sizeof(SL_complex_double)) ; - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); - - work = (SL_complex_double *)calloc(work_buffer_size, sizeof(SL_complex_double)) ; - lwork = work_buffer_size; - - // Run pdpotrf and time - float MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pzgeqrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); - if (info != 0) { - printf("Error in pzgeqrf, info = %i\n", info); - } - - float MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_double double _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); + +/* Target API Prototype */ +void pzgerqf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, SL_complex_double*, SL_complex_double*, Int*, Int*); + +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int jone=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int m = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + Int mb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size_rows matrix_size_columns block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + m = atoi(argv[1]); + } + if(argc > 2) { + n = (Int)atoi(argv[2]); + } + if(argc > 3) { + mb = (Int)atoi(argv[3]); + } + if(argc > 4) { + nb = (Int)atoi(argv[4]); + } + if(argc > 5) { + nprow = (Int)atoi(argv[5]); + } + if(argc > 6) { + npcol = (Int)atoi(argv[6]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &m, &mb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_double *A; + A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + SL_complex_double work_buffer_size; + SL_complex_double *work, *tau; + Int lwork = -1; + tau = (SL_complex_double *)calloc((mpA+nqA),sizeof(SL_complex_double)) ; + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, &work_buffer_size, &lwork, &info); + + work = (SL_complex_double *)calloc(work_buffer_size, sizeof(SL_complex_double)) ; + lwork = work_buffer_size; + + // Run pdpotrf and time + float MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pzgeqrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pzgerqf_(&m, &n, A, &ione, &jone, descA, tau, work, &lwork, &info); + if (info != 0) { + printf("Error in pzgeqrf, info = %i\n", info); + } + + float MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c index 8dd6171f..d91bd1df 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzgetrf.c @@ -1,141 +1,141 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_double double _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pzgetrf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_double *A; - Int *IPPIV; - A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; - if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pzgetrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pzgetrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pzgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); - - if (info != 0) { - printf("Error in pzgetrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_double double _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pzgetrf_(Int*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_double *A; + Int *IPPIV; + A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + IPPIV = (Int *)calloc(2*n,sizeof(Int)) ; + if (IPPIV==NULL){ printf("Error of memory allocation IPPIV %d\n",2*n); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pzgetrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pzgetrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pzgetrf_( &n, &n, A, &ione, &ione, descA, IPPIV, &info ); + + if (info != 0) { + printf("Error in pzgetrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c index 09803fb1..2d1aacda 100644 --- a/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c +++ b/TESTING/AOCL_PROGRESS_TESTS/test_aocl_progress_pzpotrf.c @@ -1,136 +1,136 @@ -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#endif -#include "mpi.h" - -#define SL_complex_double double _Complex - -void blacs_get_(Int*, Int*, Int*); -void blacs_pinfo_(Int*, Int*); -void blacs_gridinit_(Int*, char*, Int*, Int*); -void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); -void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); -void pzpotrf_(char*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*); -void blacs_gridexit_(Int*); -Int numroc_(Int*, Int*, Int*, Int*, Int*); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); - -Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) -{ - char api_name[20]; - memcpy(api_name, api, *lenapi); - printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); - return 0; -} - - -int main(int argc, char **argv) { - Int izero=0; - Int ione=1; - Int myrank_mpi, nprocs_mpi; - MPI_Init( &argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); - - Int n = 1000; // (Global) Matrix size - Int nprow = 2; // Number of row procs - Int npcol = 2; // Number of column procs - Int nb = 256; // (Global) Block size - char uplo='L'; // Matrix is lower triangular - char layout='R'; // Block cyclic, Row major processor mapping - - printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); - - if(argc > 1) { - n = atoi(argv[1]); - } - if(argc > 2) { - nb = atoi(argv[2]); - } - if(argc > 3) { - nprow = atoi(argv[3]); - } - if(argc > 4) { - npcol = atoi(argv[4]); - } - - assert((int)nprow * (int)npcol == (int)nprocs_mpi); - // assert(nprow * npcol == nprocs_mpi); - - // Initialize BLACS - Int iam, nprocs; - Int zero = 0; - Int ictxt, myrow, mycol; - blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size - blacs_get_(&zero, &zero, &ictxt ); // -> Create context - blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid - blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) - - // Compute the size of the local matrices - Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A - Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A - - printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); - - // Allocate and fill the matrices A and B - // A[I,J] = (I == J ? 5*n : I+J) - SL_complex_double *A; - A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; - if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } - - Int k = 0; - for (Int j = 0; j < nqA; j++) { // local col - Int l_j = j / nb; // which block - Int x_j = j % nb; // where within that block - Int J = (l_j * npcol + mycol) * nb + x_j; // global col - for (Int i = 0; i < mpA; i++) { // local row - Int l_i = i / nb; // which block - Int x_i = i % nb; // where within that block - Int I = (l_i * nprow + myrow) * nb + x_i; // global row - assert(I < n); - assert(J < n); - if(I == J) { - A[k] = n*n; - } else { - A[k] = I+J; - } - //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); - k++; - } - } - - // Create descriptor - Int descA[9]; - Int info; - Int lddA = mpA > 1 ? mpA : 1; - descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); - if(info != 0) { - printf("Error in descinit, info = %i\n", info); - } - - // Run pzpotrf and time - double MPIt1 = MPI_Wtime(); - printf("[%dx%d] Starting pzpotrf\n", myrow, mycol); - aocl_scalapack_set_progress(&AOCL_progress); - pzpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); - if (info != 0) { - printf("Error in pzpotrf, info = %i\n", info); - } - - double MPIt2 = MPI_Wtime(); - printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); - free(A); - - // Exit and finalize - blacs_gridexit_(&ictxt); - MPI_Finalize(); - return 0; -} +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include "mpi.h" + +#define SL_complex_double double _Complex + +void blacs_get_(Int*, Int*, Int*); +void blacs_pinfo_(Int*, Int*); +void blacs_gridinit_(Int*, char*, Int*, Int*); +void blacs_gridinfo_(Int*, Int*, Int*, Int*, Int*); +void descinit_(Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*, Int*); +void pzpotrf_(char*, Int*, SL_complex_double*, Int*, Int*, Int*, Int*); +void blacs_gridexit_(Int*); +Int numroc_(Int*, Int*, Int*, Int*, Int*); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes); + +Int AOCL_progress(char* api, Int *lenapi, Int *progress, Int *mpi_rank, Int *total_mpi_processes) +{ + char api_name[20]; + memcpy(api_name, api, *lenapi); + printf( "In AOCL Progress MPI Rank: %i API: %s progress: %i MPI processes: %i\n", *mpi_rank, api_name, *progress,*total_mpi_processes ); + return 0; +} + + +int main(int argc, char **argv) { + Int izero=0; + Int ione=1; + Int myrank_mpi, nprocs_mpi; + MPI_Init( &argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi); + + Int n = 1000; // (Global) Matrix size + Int nprow = 2; // Number of row procs + Int npcol = 2; // Number of column procs + Int nb = 256; // (Global) Block size + char uplo='L'; // Matrix is lower triangular + char layout='R'; // Block cyclic, Row major processor mapping + + printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n"); + + if(argc > 1) { + n = atoi(argv[1]); + } + if(argc > 2) { + nb = atoi(argv[2]); + } + if(argc > 3) { + nprow = atoi(argv[3]); + } + if(argc > 4) { + npcol = atoi(argv[4]); + } + + assert((int)nprow * (int)npcol == (int)nprocs_mpi); + // assert(nprow * npcol == nprocs_mpi); + + // Initialize BLACS + Int iam, nprocs; + Int zero = 0; + Int ictxt, myrow, mycol; + blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size + blacs_get_(&zero, &zero, &ictxt ); // -> Create context + blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid + blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col) + + // Compute the size of the local matrices + Int mpA = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A + Int nqA = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A + + printf(" Proc %d/%d for MPI, proc %d/%d for BLACS in position (%d,%d)/(%d,%d) with local matrix %dx%d, global matrix %d, block size %d\n",myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol,nprow,npcol,mpA,nqA,n,nb); + + // Allocate and fill the matrices A and B + // A[I,J] = (I == J ? 5*n : I+J) + SL_complex_double *A; + A = (SL_complex_double *)calloc(mpA*nqA,sizeof(SL_complex_double)) ; + if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); } + + Int k = 0; + for (Int j = 0; j < nqA; j++) { // local col + Int l_j = j / nb; // which block + Int x_j = j % nb; // where within that block + Int J = (l_j * npcol + mycol) * nb + x_j; // global col + for (Int i = 0; i < mpA; i++) { // local row + Int l_i = i / nb; // which block + Int x_i = i % nb; // where within that block + Int I = (l_i * nprow + myrow) * nb + x_i; // global row + assert(I < n); + assert(J < n); + if(I == J) { + A[k] = n*n; + } else { + A[k] = I+J; + } + //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]); + k++; + } + } + + // Create descriptor + Int descA[9]; + Int info; + Int lddA = mpA > 1 ? mpA : 1; + descinit_( descA, &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info); + if(info != 0) { + printf("Error in descinit, info = %i\n", info); + } + + // Run pzpotrf and time + double MPIt1 = MPI_Wtime(); + printf("[%dx%d] Starting pzpotrf\n", myrow, mycol); + aocl_scalapack_set_progress(&AOCL_progress); + pzpotrf_(&uplo, &n, A, &ione, &ione, descA, &info); + if (info != 0) { + printf("Error in pzpotrf, info = %i\n", info); + } + + double MPIt2 = MPI_Wtime(); + printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1); + free(A); + + // Exit and finalize + blacs_gridexit_(&ictxt); + MPI_Finalize(); + return 0; +} diff --git a/TESTING/EIG/CMakeLists.txt b/TESTING/EIG/CMakeLists.txt index 7addff7c..43bb00e0 100644 --- a/TESTING/EIG/CMakeLists.txt +++ b/TESTING/EIG/CMakeLists.txt @@ -7,6 +7,7 @@ set (dmatgen pdmatgen.f pmatgeninc.f) set (cmatgen pcmatgen.f pmatgeninc.f) set (zmatgen pzmatgen.f pmatgeninc.f) set (TTRD_SRC ${CMAKE_SOURCE_DIR}/SRC) +set (FRAMEWORK_SRC ${CMAKE_SOURCE_DIR}/FRAMEWORK) if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES Clang) add_definitions(-D__STDC__) @@ -23,10 +24,10 @@ add_executable(xchrd pchrddriver.f pchrdinfo.f pcgehdrv.f pclafchk.f ${cmatgen}) add_executable(xzhrd pzhrddriver.f pzhrdinfo.f pzgehdrv.f pzlafchk.f ${zmatgen}) if(MSVC) -add_executable(xstrd pstrddriver.f psttrdtester.f pslatran.f pstrdinfo.f pssytdrv.f pslafchk.f ${TTRD_SRC}/pssyttrd.f xpjlaenv.f ${smatgen}) -add_executable(xdtrd pdtrddriver.f pdttrdtester.f pdlatran.f pdtrdinfo.f pdsytdrv.f pdlafchk.f ${TTRD_SRC}/pdsyttrd.f xpjlaenv.f ${dmatgen}) -add_executable(xctrd pctrddriver.f pcttrdtester.f pclatran.f pctrdinfo.f pchetdrv.f pclafchk.f ${TTRD_SRC}/pchettrd.f xpjlaenv.f ${cmatgen}) -add_executable(xztrd pztrddriver.f pzttrdtester.f pzlatran.f pztrdinfo.f pzhetdrv.f pzlafchk.f ${TTRD_SRC}/pzhettrd.f xpjlaenv.f ${zmatgen}) +add_executable(xstrd pstrddriver.f psttrdtester.f pslatran.f pstrdinfo.f pssytdrv.f pslafchk.f ${TTRD_SRC}/pssyttrd.f xpjlaenv.f ${smatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) +add_executable(xdtrd pdtrddriver.f pdttrdtester.f pdlatran.f pdtrdinfo.f pdsytdrv.f pdlafchk.f ${TTRD_SRC}/pdsyttrd.f xpjlaenv.f ${dmatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) +add_executable(xctrd pctrddriver.f pcttrdtester.f pclatran.f pctrdinfo.f pchetdrv.f pclafchk.f ${TTRD_SRC}/pchettrd.f xpjlaenv.f ${cmatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) +add_executable(xztrd pztrddriver.f pzttrdtester.f pzlatran.f pztrdinfo.f pzhetdrv.f pzlafchk.f ${TTRD_SRC}/pzhettrd.f xpjlaenv.f ${zmatgen} ${FRAMEWORK_SRC}/SL_Context_module.f) else() add_executable(xstrd pstrddriver.f psttrdtester.f pslatran.f pstrdinfo.f pssytdrv.f pslafchk.f xpjlaenv.f ${smatgen}) add_executable(xdtrd pdtrddriver.f pdttrdtester.f pdlatran.f pdtrdinfo.f pdsytdrv.f pdlafchk.f xpjlaenv.f ${dmatgen}) diff --git a/TESTING/LIN/pdinvdriver.f b/TESTING/LIN/pdinvdriver.f index 0a8a5410..4503ce07 100644 --- a/TESTING/LIN/pdinvdriver.f +++ b/TESTING/LIN/pdinvdriver.f @@ -64,7 +64,7 @@ PROGRAM PDINVDRIVER PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1, $ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6, $ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 ) - INTEGER DBLESZ, INTGSZ, MEMSIZ, NTESTS, TOTMEM + INTEGER DBLESZ, INTGSZ, NTESTS, TOTMEM #ifdef ENABLE_ILP64 PARAMETER ( INTGSZ = 8 ) #else @@ -72,12 +72,13 @@ PROGRAM PDINVDRIVER #endif * #ifndef DYNAMIC_WORK_MEM_ALLOC + INTEGER MEMSIZ DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ MEMSIZ = TOTMEM / DBLESZ, NTESTS = 20, $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) #else - INTEGER, PARAMETER :: MEMSIZ = 2100000000 + INTEGER, PARAMETER :: MEMSIZ = WORK_BUFFER_SIZE DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, TOTMEM = 2000000, $ NTESTS = 20, diff --git a/TESTING/LIN/pdludriver.f b/TESTING/LIN/pdludriver.f index 09e04f62..f21b46fd 100644 --- a/TESTING/LIN/pdludriver.f +++ b/TESTING/LIN/pdludriver.f @@ -87,8 +87,7 @@ PROGRAM PDLUDRIVER $ PADVAL = -9923.0D+0, ZERO = 0.0D+0 ) #else INTEGER DBLESZ, NTESTS - INTEGER, PARAMETER :: MEMSIZ = 2100000000 - + INTEGER, PARAMETER :: MEMSIZ = WORK_BUFFER_SIZE DOUBLE PRECISION PADVAL, ZERO PARAMETER ( DBLESZ = 8, $ NTESTS = 20, @@ -117,13 +116,14 @@ PROGRAM PDLUDRIVER $ NBVAL( NTESTS ), NRVAL( NTESTS ), $ NVAL( NTESTS ), PVAL( NTESTS ), $ QVAL( NTESTS ) -#ifndef DYNAMIC_WORK_MEM_ALLOC +#ifndef DYNAMIC_WORK_MEM_ALLOC DOUBLE PRECISION MEM( MEMSIZ ), CTIME( 2 ), WTIME( 2 ) #else DOUBLE PRECISION CTIME( 2 ), WTIME( 2 ) DOUBLE PRECISION, allocatable :: MEM (:) #endif CHARACTER SVERSION( 100 ) + INTEGER VER_STR_LEN * .. * .. External Subroutines .. EXTERNAL BLACS_BARRIER, BLACS_EXIT, BLACS_GET, @@ -165,9 +165,9 @@ PROGRAM PDLUDRIVER * Print version * IF( IAM.EQ.0 ) THEN - CALL GET_AOCL_SCALAPACK_VERSION( SVERSION ) - WRITE(*, *) - WRITE(*, *) 'AOCL Version: ', SVERSION + CALL GET_AOCL_SCALAPACK_VERSION( SVERSION, VER_STR_LEN ) + WRITE(*, *) + WRITE(*, *) 'AOCL Version: ', SVERSION(1:VER_STR_LEN) END IF * * Print headings diff --git a/TESTING/README.txt b/TESTING/README.txt new file mode 100644 index 00000000..7e92a360 --- /dev/null +++ b/TESTING/README.txt @@ -0,0 +1,39 @@ +Executing the AOCL-ScaLAPACK test suite +======================================= + +To execute AOCL-ScaLAPACK test suite against different +MPI configurations (ranks, binding, etc) you can use the script called +'/scalapack_test.sh' + +Upon running scalapack_test.sh the results will be saved in the +directory $HOME/aocl_scalapack_testing_results. The script provides +several command line options, and if no arguments are given, the +following default options will be used: + + a) MPI ranks => Maximum number of available cpu cores in the system + b) Test programs => All the programs in AOCL-ScaLAPACK test suite + will be executed + c) MPI flavour => The script will search for the mpirun executable in the + PATH variable and corresponding MPI installation will be used. + d) MPI binding, mapping => The test will be performed only with + 'map-by core' and 'bind-to core' + + +Below are some helpful examples demonstrating different options: + +Eg: To test only single precision cholesky transformation for all + the MPI mapping for ranks between 4 to 16 use + $ scalapack_test.sh -t xsllt -s 4 -i 1 -e 16 -c all + +Eg: To test all the programs with maximum avialable ranks + with MPI mapping "map-by l3cache" + $ scalapack_test.sh -t all -c map_l3cache + +To view all the supported options execute the script with argument -h + +Address Sanitizer(ASAN) testing: +================================ + +Address saitizer(ASAN) tests are supported through the AOCL-ScaLAPACK +test suite. To enable the same, include the build configure option +'-DENABLE_ASAN_TESTS=ON'. diff --git a/TOOLS/SL_gridreshape.c b/TOOLS/SL_gridreshape.c index aa0c9290..b069a8c8 100644 --- a/TOOLS/SL_gridreshape.c +++ b/TOOLS/SL_gridreshape.c @@ -1,5 +1,6 @@ #include #include +#include "../BLACS/SRC/Bdef.h" #ifndef Int #define Int int diff --git a/scalapack_test.sh b/scalapack_test.sh index 8b29076e..da035471 100755 --- a/scalapack_test.sh +++ b/scalapack_test.sh @@ -1,454 +1,493 @@ -#!/usr/bin/env bash - -echo " " -echo " Scalapack Testing Started " - -cd TESTING -echo "SCALAPACK Test suite:" >>../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt - -echo " xcbrd " >>../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcdblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcdblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcdtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcdtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcevc " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcevc >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcheevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcheevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xchrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xchrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xclu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xclu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcnep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcnep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcpbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcpbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xcsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xcsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xctrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xctrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdbrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xddblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xddblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xddtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xddtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdhrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdhrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdhseqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdhseqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdnep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdnep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdpbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdpbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdsvd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdsvd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdsyevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdsyevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xdtrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xdtrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsbrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsdblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsdblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsdtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsdtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xshrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xshrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xshseqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xshseqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xslu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xslu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsnep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsnep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xspbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xspbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xsqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xsqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xssep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xssep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xssvd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xssvd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xssyevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xssyevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xstrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xstrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzbrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzbrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzdblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzdblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzdtlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzdtlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzevc " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzevc >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzgblu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzgblu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzgsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzgsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzheevr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzheevr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzhrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzhrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzinv " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzinv >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzls " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzls >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzlu " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzlu >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xznep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xznep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzpbllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzpbllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzptllt " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzptllt >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzqr " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzqr >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xzsep " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xzsep >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt -echo " xztrd " >> ../ScalaPack_TestResults.txt - -mpirun --bind-to core --nooversubscribe xztrd >> ../ScalaPack_TestResults.txt -sleep 3 -echo "---------------------------------------------------------" >> ../ScalaPack_TestResults.txt -echo " " >> ../ScalaPack_TestResults.txt - +#!/bin/bash + +# This script runs the scalapack testing programs against various mpi +# configurations. Test results will be saved in the folder +# $HOME/aocl_scalapack_testing_results +# +# The below default options will be used if it is run without any +# commandline arguments +# a) MPI ranks => Maximum number of available cpu cores in the system +# b) Test programs run => all, All the scalapack testing programs +# present in the TESTING folder will be run +# c) MPI flavour => It will look for the mpirun executable in the +# PATH variable and corresponding MPI installation will be used. +# d) MPI binding, mapping => The test will be performed only +# with 'map-by core' and 'bind-to core' +# +# User can change this behaviour with the command line options +# +# Eg: To test only single precision cholesky transformation for all +# the MPI mapping for ranks between 4 to 16 use +# $ scalapack_test.sh -t xsllt -s 4 -i 1 -e 16 -c all +# +# Eg: To test all the programs with maximum avialable ranks +# with MPI mapping "map-by l3cache" +# $ scalapack_test.sh -t all -c map_l3cache +# +# To print all the supported options run it with the argument -h + +#Default values for the test +num_sample_to_collect=1 +test_execution_dir="./TESTING" +user_input_test="all" +test_description="scalapack_default_test" +mpi_mapping_binding="map_core_bind_core" + +scalapack_test_list_fast=( +"xcdtlu" +"xcgblu" +"xcinv" +"xclu" +"xcnep" +"xcptllt" +"xcqr" +"xcsep" +"xdbrd" +"xddblu" +"xddtlu" +"xdhrd" +"xdlu" +"xdpbllt" +"xdptllt" +"xdsvd" +"xsbrd" +"xsgblu" +"xsgsep" +"xshrd" +"xspbllt" +"xsptllt" +"xzevc" +"xzinv" +"xznep" +"xzptllt" +) + +scalapack_test_list_normal=( +"xcbrd" +"xcdblu" +"xcevc" +"xcgsep" +"xchrd" +"xcllt" +"xcls" +"xcpbllt" +"xctrd" +"xdgblu" +"xdgsep" +"xdhseqr" +"xdinv" +"xdllt" +"xdls" +"xdnep" +"xdqr" +"xdsep" +"xdtrd" +"xsdblu" +"xsdtlu" +"xshseqr" +"xsinv" +"xsllt" +"xsls" +"xslu" +"xsnep" +"xsqr" +"xssep" +"xssvd" +"xstrd" +"xzbrd" +"xzdblu" +"xzdtlu" +"xzgblu" +"xzgsep" +"xzhrd" +"xzllt" +"xzls" +"xzlu" +"xzpbllt" +"xzqr" +"xzsep" +"xztrd" +) + +scalapack_test_list_slow=( +"xdsyevr" +"xssyevr" +"xcheevr" +"xzheevr" +) + +scalapack_test_list_execute=() +scalapack_test_list_mpifail=() + +mpi_map_bind_supported_list=( +"map_core_bind_core --map-by core --bind-to core" +"map_l3cache_bind_core --map-by l3cache --bind-to core" +"map_numa_bind_core --map-by numa --bind-to core" +"map_socket_bind_core --map-by socket --bind-to core" +"map_l3cache --map-by l3cache" +"map_numa --map-by numa" +"map_socket --map-by socket" +) + +mpi_map_bind_testing_list=() + +usage() +{ + echo -e "scalapack_test.sh -t + -m + -d + -f + -s + -i + -e + -x + -n + -c + -h " +} + +num_mpi_ranks_step=$(nproc --all) +num_mpi_ranks_start=$(nproc --all) +num_mpi_ranks_end=$(nproc --all) + +while getopts "t:m:d:f:s:i:e:x:n:c:h" OPTION +do + case $OPTION in + h) + usage + exit 0 + ;; + t) + user_input_test="$OPTARG" + ;; + m) + mpi_install_path="$OPTARG" + ;; + d) + test_description="$OPTARG" + ;; + f) + test_execution_dir="$OPTARG" + ;; + s) + num_mpi_ranks_start="$OPTARG" + ;; + i) + num_mpi_ranks_step="$OPTARG" + ;; + e) + num_mpi_ranks_end="$OPTARG" + ;; + n) + num_sample_to_collect="$OPTARG" + ;; + c) + mpi_mapping_binding="$OPTARG" + ;; + x) + for tst in ${OPTARG//,/ } + do + if [ "$tst" == "fast" ] ; then + scalapack_test_list_exclude+=( ${scalapack_test_list_fast[@]} ) + elif [ "$tst" == "normal" ] ; then + scalapack_test_list_exclude+=( ${scalapack_test_list_normal[@]} ) + elif [ "$tst" == "slow" ] ; then + scalapack_test_list_exclude+=( ${scalapack_test_list_slow[@]} ) + else + scalapack_test_list_exclude+=( $tst ) + fi + done + ;; + ?) + usage + exit 1 + ;; + esac +done + +# Verify the user supplied mapping and binding option (-c) +# If the check is successful set map_bind_found to 1 +map_bind_found=0 +if [ "$mpi_mapping_binding" == "all" ] ; then + mpi_map_bind_testing_list+=( "${mpi_map_bind_supported_list[@]}" ) + map_bind_found=1 +else + for mpi_map_bind in "${mpi_map_bind_supported_list[@]}" + do + map_name=$(echo $mpi_map_bind | cut -d' ' -f1) + + if [ "$map_name" == "$mpi_mapping_binding" ] ; then + mpi_map_bind_testing_list+=( "$mpi_map_bind" ) + map_bind_found=1 + fi + done +fi + +if [ "$map_bind_found" -eq 0 ] ; then + echo "The supplied $mpi_mapping_binding is wrong. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +# Verify the user supplied test (-t) +# If the check is successful set test_found to 1 +test_found=0 +if [ -z "$user_input_test" ] ; then + echo "No test input. Specify a test name or 'fast, normal, slow, all'. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +else + if [ "$user_input_test" == "fast" ] || [ "$user_input_test" == "all" ]; then + scalapack_test_list_execute+=( ${scalapack_test_list_fast[@]} ) + test_found=1 + fi + + if [ "$user_input_test" == "normal" ] || [ "$user_input_test" == "all" ]; then + scalapack_test_list_execute+=( ${scalapack_test_list_normal[@]} ) + test_found=1 + fi + + if [ "$user_input_test" == "slow" ] || [ "$user_input_test" == "all" ]; then + scalapack_test_list_execute+=( ${scalapack_test_list_slow[@]} ) + test_found=1 + fi + + if [ "$test_found" -eq 0 ] ; then + for tst in ${scalapack_test_list_fast[@]} + do + if [ "$user_input_test" == "$tst" ] ; then + scalapack_test_list_execute+=( $tst ) + test_found=1 + fi + done + + for tst in ${scalapack_test_list_normal[@]} + do + if [ "$user_input_test" == "$tst" ] ; then + scalapack_test_list_execute+=( $tst ) + test_found=1 + fi + done + + for tst in ${scalapack_test_list_slow[@]} + do + if [ "$user_input_test" == "$tst" ] ; then + scalapack_test_list_execute+=( $tst ) + test_found=1 + fi + done + fi +fi + +if [ "$test_found" -eq 0 ] ; then + echo "Wrong test input. Specify a valid test name or 'fast, normal, slow, all'. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +# check for a valid mpirun in the default PATH if user did not supply any(-m) +if [ -z "$mpi_install_path" ] ; then +# Try to find mpirun in PATH variable + mpirun_path=$(which mpirun) + mpirun_path=$(echo $mpirun_path | rev | cut -d'/' -f3- | rev) + + if [ -z "$mpirun_path" ] ; then + echo "Unable to find mpirun. Exiting !!!" + exit 1 + fi + + mpi_install_path=$mpirun_path +fi + +if [ -z "$mpi_install_path" ] ; then + echo "Please specify mpi install path with -m, can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +else + if [ ! -f $mpi_install_path/bin/mpicc ] ; then + echo "Unable to find mpicc @ $mpi_install_path/bin/mpicc" + echo "Please specify a valid mpi install path with -m, can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 + fi +fi + +# Check the scalapack test executables in user supplied/default folder +if [ -z "$test_execution_dir" ] ; then + echo "Please specify the path to scalapack TESTING folder. can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +else + for scalapack_test in ${scalapack_test_list_execute[@]} + do + if [ ! -f $test_execution_dir/$scalapack_test ] ; then + echo "Unable to find $scalapack_test exe in $test_execution_dir. can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 + fi + done +fi + +if [ -z "$test_description" ] ; then + echo "Please specify a test description. can't proceed without it. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +output_dir="$HOME/aocl_scalapack_testing_results/" +dir_str=$(date +%b_%d_%Y_%H_%M_%S | tr '[:upper:]' '[:lower:]') +test_description=$(echo $test_description | tr -s ' ' | tr ' ' '_') +result_folder="$test_description""_""$dir_str" + +#set mpi binary and library path +PATH=$mpi_install_path/bin:$PATH; export PATH +LD_LIBRARY_PATH=$mpi_install_path/lib:$LD_LIBRARY_PATH; export LD_LIBRARY_PATH + +test_execution_dir=$(readlink -f $test_execution_dir) + +cd $test_execution_dir +if [ $? -ne 0 ]; then + echo "Unable to change directory to path $test_execution_dir. Please specify a valid scalapack TESTING folder. Exiting !!!" + echo -e "\n\nUsage is given below" + usage + exit 1 +fi + +mkdir -p $output_dir/$result_folder + +test_log_file=$output_dir/$result_folder/test_log.txt +test_env_file=$output_dir/$result_folder/test_env.txt +echo -e "test_description:$test_description" > $test_env_file +echo -e "scalapack TESTING dir:$test_execution_dir" >> $test_env_file +echo -e "mpi path used for testing:$mpi_install_path" >> $test_env_file +echo -e "\nenv PATH:$PATH" >> $test_env_file +echo -e "\nenv LD_LIBRARY_PATH:$LD_LIBRARY_PATH" >> $test_env_file +echo -e "\nenv OMP_NUM_THREADS:$OMP_NUM_THREADS" >> $test_env_file +echo -e "\nenv LD_PRELOAD:$LD_PRELOAD" >> $test_env_file +echo -e "\nMPI configuration:" >> $test_env_file + +if [ -f $mpi_install_path/bin/ompi_info ] ; then + echo -e "$(ompi_info)" >> $test_env_file +elif [ -f $mpi_install_path/bin/impi_info ] ; then + echo -e "$(mpirun --version)\n" >> $test_env_file +else + echo -e "Unknown MPI installation" >> $test_env_file +fi + +echo "--------------------------------------------------------" >> $test_log_file +echo "Main test loop started @ $(date)" >> $test_log_file +echo "--------------------------------------------------------" >> $test_log_file + +##Loop over tests +for mpi_map_bind_var in "${mpi_map_bind_testing_list[@]}" +do + test_name=$(echo $mpi_map_bind_var | cut -d' ' -f1) + mpi_opt=$(echo $mpi_map_bind_var | cut -d' ' -f2-) + folder1="$output_dir/$result_folder/$test_name" + + echo "$test_name started @ $(date)" >> $test_log_file + + mkdir "$folder1" + for scalapack_test in ${scalapack_test_list_execute[@]} + do + for scalapack_exclude_test in ${scalapack_test_list_exclude[@]} + do + if [ "$scalapack_test" == "$scalapack_exclude_test" ] ; then + continue 2 + fi + done + + folder2="$folder1/$scalapack_test" + mkdir "$folder2" + + for num_core in `seq $num_mpi_ranks_start $num_mpi_ranks_step $num_mpi_ranks_end` + do + folder3="$folder2/result_nproc_$num_core" + mkdir "$folder3" + + echo "Executing $test_name:$scalapack_test with $num_core MPI ranks. @ $(date)" + + declare -i total_time=0 + declare -i start_time=0 + declare -i end_time=0 + + for (( i = 0; i < $num_sample_to_collect; i++)); + do + folder4="$folder3/sample_$i" + mkdir "$folder4" + + # Execute the test + start_time=$(date +%s%N) + result_str=$(mpirun -np $num_core $mpi_opt ./$scalapack_test 2>&1) + mpirun_exit_code=$? + end_time=$(date +%s%N) + total_time=$(( total_time + (end_time - start_time) )) + + echo "$result_str" > $folder4/result.txt + + if [ $mpirun_exit_code -ne 0 ] ; then + echo "mpirun failed for the test:$scalapack_test" | tee -a $test_log_file + scalapack_test_list_mpifail+=( $scalapack_test ) + fi + done + echo "#__test_runtime__:$test_name:$scalapack_test:$(( total_time / 1000000 )) ms:$num_core" >> $test_log_file + done + done + echo "$test_name ended @ $(date)" >> $test_log_file +done + +echo "--------------------------------------------------------" >> $test_log_file +echo "Main test loop ended @ $(date)" >> $test_log_file +echo "--------------------------------------------------------" >> $test_log_file + +failed_file_list=$(find $output_dir/$result_folder -name result.txt | xargs grep -l FAILED) +if [ -z "$failed_file_list" ] ; then + echo "Number of scalapack routines failed: 0" | tee -a $test_log_file +else + echo "Some of the scalapack routines failed. Please find the result_files with failed routines" | tee -a $test_log_file + for fail_result in $failed_file_list + do + echo $fail_result | tee -a $test_log_file + done +fi + +if [ ${#scalapack_test_list_mpifail[@]} -eq 0 ]; then + echo "Number of mpirun failures: 0" | tee -a $test_log_file +else + echo "Some of the scalapack test applications failed in mpirun. Please find it below" | tee -a $test_log_file + for mpi_fail in "${scalapack_test_list_mpifail[@]}" + do + echo $mpi_fail | tee -a $test_log_file + done +fi + +scalapack_test_list_skipped=() +skip_file_check_list=$(find $output_dir/$result_folder -name result.txt) +for skip_check in $skip_file_check_list +do + match=$(grep -a "tests skipped" $skip_check | awk '{print $1}') + + if [ ! -z $match ] ; then + if [ "$match" -ne "0" ] ; then + scalapack_test_list_skipped+=( $skip_check ) + fi + fi +done + +if [ ${#scalapack_test_list_skipped[@]} -eq 0 ]; then + echo "Number of scalapack routines skipped: 0" | tee -a $test_log_file +else + echo "Some of the scalapack routines skipped. Please find the result_files with skipped routines" | tee -a $test_log_file + for mpi_skip in "${scalapack_test_list_skipped[@]}" + do + echo $mpi_skip | tee -a $test_log_file + done +fi