diff --git a/config/sifive_x280/bli_cntx_init_sifive_x280.c b/config/sifive_x280/bli_cntx_init_sifive_x280.c index d04b734f90..e6ce602b89 100644 --- a/config/sifive_x280/bli_cntx_init_sifive_x280.c +++ b/config/sifive_x280/bli_cntx_init_sifive_x280.c @@ -36,181 +36,181 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx ) { - blksz_t blkszs[ BLIS_NUM_BLKSZS ]; - - // Set default kernel blocksizes and functions. - bli_cntx_init_sifive_x280_ref( cntx ); - - // ------------------------------------------------------------------------- - - // Update the context with optimized native kernels. - bli_cntx_set_ukrs - ( - cntx, - - // Level 1 - BLIS_ADDV_KER, BLIS_FLOAT, bli_saddv_sifive_x280_intr, - BLIS_ADDV_KER, BLIS_DOUBLE, bli_daddv_sifive_x280_intr, - BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_x280_intr, - BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_x280_intr, - - BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_asm, - BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_asm, - BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_asm, - BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_asm, - - BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_x280_intr, - BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_x280_intr, - BLIS_AXPBYV_KER, BLIS_SCOMPLEX, bli_caxpbyv_sifive_x280_intr, - BLIS_AXPBYV_KER, BLIS_DCOMPLEX, bli_zaxpbyv_sifive_x280_intr, - - BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_sifive_x280_intr, - BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_sifive_x280_intr, - BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_x280_intr, - BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_x280_intr, - - BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_asm, - BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_asm, - BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_asm, - BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_asm, - - BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_x280_intr, - BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_x280_intr, - BLIS_DOTV_KER, BLIS_SCOMPLEX, bli_cdotv_sifive_x280_intr, - BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_sifive_x280_intr, - - BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_sifive_x280_intr, - BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_sifive_x280_intr, - BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_x280_intr, - BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_x280_intr, - - BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_asm, - BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_asm, - BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_asm, - BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_asm, - - BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_asm, - BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_asm, - BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_asm, - BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_asm, - - BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_x280_intr, - BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_x280_intr, - BLIS_SCAL2V_KER, BLIS_SCOMPLEX, bli_cscal2v_sifive_x280_intr, - BLIS_SCAL2V_KER, BLIS_DCOMPLEX, bli_zscal2v_sifive_x280_intr, - - BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_sifive_x280_intr, - BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_sifive_x280_intr, - BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_x280_intr, - BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_x280_intr, - - BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_asm, - BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_asm, - BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_asm, - BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_asm, - - BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_x280_intr, - BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_x280_intr, - BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_x280_intr, - BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_x280_intr, - - BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_asm, - BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_asm, - BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_asm, - BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_asm, - - BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_x280_intr, - BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_x280_intr, - BLIS_XPBYV_KER, BLIS_SCOMPLEX, bli_cxpbyv_sifive_x280_intr, - BLIS_XPBYV_KER, BLIS_DCOMPLEX, bli_zxpbyv_sifive_x280_intr, - - // Level 1f - BLIS_AXPY2V_KER, BLIS_FLOAT, bli_saxpy2v_sifive_x280_intr, - BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_sifive_x280_intr, - BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_x280_intr, - BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_x280_intr, - - BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_asm, - BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_asm, - BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_asm, - BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_asm, - - BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_asm, - BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_asm, - BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_asm, - BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_asm, - - BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_x280_intr, - BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_x280_intr, - BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_x280_intr, - BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_x280_intr, - - BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_asm, - BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_asm, - BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_asm, - BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_asm, - - // Level 3 - BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_asm_7m4, - BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_asm_7m4, - BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_asm_3m4, - BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_asm_3m4, - - BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_asm, - BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_asm, - BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_asm, - BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_asm, - BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_asm, - BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_asm, - BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_asm, - BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_asm, - - BLIS_VA_END - ); - - // Update the context with storage preferences. - bli_cntx_set_ukr_prefs - ( - cntx, - - BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, - BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, - BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, - BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, - - BLIS_VA_END - ); - - // Initialize level-3 blocksize objects with architecture-specific values. - // s d c z - bli_blksz_init ( &blkszs[ BLIS_MR ], 7, 7, 3, 3, - 8, 8, 4, 4 ); - bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 32, 64, 32 ); - bli_blksz_init_easy( &blkszs[ BLIS_MC ], 56, 56, 24, 24 ); - bli_blksz_init_easy( &blkszs[ BLIS_NC ], 512, 256, 512, 256 ); - bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); - // Default BLIS_BBM_s = 1, but set here to ensure it's correct - bli_blksz_init_easy( &blkszs[ BLIS_BBM ], 1, 1, 1, 1 ); - bli_blksz_init_easy( &blkszs[ BLIS_BBN ], 1, 1, 1, 1 ); - - // Update the context with the current architecture's register and cache - // blocksizes (and multiples) for native execution. - bli_cntx_set_blkszs - ( - cntx, - - // level-3 - BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, - BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, - BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, - BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, - BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, - - // level-1m - BLIS_BBM, &blkszs[ BLIS_BBM ], BLIS_BBM, - BLIS_BBN, &blkszs[ BLIS_BBN ], BLIS_BBN, - - BLIS_VA_END - ); + blksz_t blkszs[ BLIS_NUM_BLKSZS ]; + + // Set default kernel blocksizes and functions. + bli_cntx_init_sifive_x280_ref( cntx ); + + // ------------------------------------------------------------------------- + + // Update the context with optimized native kernels. + bli_cntx_set_ukrs + ( + cntx, + + // Level 1 + BLIS_ADDV_KER, BLIS_FLOAT, bli_saddv_sifive_x280_intr, + BLIS_ADDV_KER, BLIS_DOUBLE, bli_daddv_sifive_x280_intr, + BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_x280_intr, + BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_x280_intr, + + BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_asm, + BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_asm, + BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_asm, + BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_asm, + + BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_x280_intr, + BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_x280_intr, + BLIS_AXPBYV_KER, BLIS_SCOMPLEX, bli_caxpbyv_sifive_x280_intr, + BLIS_AXPBYV_KER, BLIS_DCOMPLEX, bli_zaxpbyv_sifive_x280_intr, + + BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_sifive_x280_intr, + BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_sifive_x280_intr, + BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_x280_intr, + BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_x280_intr, + + BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_asm, + BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_asm, + BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_asm, + BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_asm, + + BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_x280_intr, + BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_x280_intr, + BLIS_DOTV_KER, BLIS_SCOMPLEX, bli_cdotv_sifive_x280_intr, + BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_sifive_x280_intr, + + BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_sifive_x280_intr, + BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_sifive_x280_intr, + BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_x280_intr, + BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_x280_intr, + + BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_asm, + BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_asm, + BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_asm, + BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_asm, + + BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_asm, + BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_asm, + BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_asm, + BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_asm, + + BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_x280_intr, + BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_x280_intr, + BLIS_SCAL2V_KER, BLIS_SCOMPLEX, bli_cscal2v_sifive_x280_intr, + BLIS_SCAL2V_KER, BLIS_DCOMPLEX, bli_zscal2v_sifive_x280_intr, + + BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_sifive_x280_intr, + BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_sifive_x280_intr, + BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_x280_intr, + BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_x280_intr, + + BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_asm, + BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_asm, + BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_asm, + BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_asm, + + BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_x280_intr, + BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_x280_intr, + BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_x280_intr, + BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_x280_intr, + + BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_asm, + BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_asm, + BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_asm, + BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_asm, + + BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_x280_intr, + BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_x280_intr, + BLIS_XPBYV_KER, BLIS_SCOMPLEX, bli_cxpbyv_sifive_x280_intr, + BLIS_XPBYV_KER, BLIS_DCOMPLEX, bli_zxpbyv_sifive_x280_intr, + + // Level 1f + BLIS_AXPY2V_KER, BLIS_FLOAT, bli_saxpy2v_sifive_x280_intr, + BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_sifive_x280_intr, + BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_x280_intr, + BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_x280_intr, + + BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_asm, + BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_asm, + BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_asm, + BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_asm, + + BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_asm, + BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_asm, + BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_asm, + BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_asm, + + BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_x280_intr, + BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_x280_intr, + BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_x280_intr, + BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_x280_intr, + + BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_asm, + BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_asm, + BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_asm, + BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_asm, + + // Level 3 + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_asm_7m4, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_asm_7m4, + BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_asm_3m4, + BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_asm_3m4, + + BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_asm, + BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_asm, + BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_asm, + BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_asm, + BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_asm, + BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_asm, + BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_asm, + BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_asm, + + BLIS_VA_END + ); + + // Update the context with storage preferences. + bli_cntx_set_ukr_prefs + ( + cntx, + + BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, + BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, + + BLIS_VA_END + ); + + // Initialize level-3 blocksize objects with architecture-specific values. + // s d c z + bli_blksz_init ( &blkszs[ BLIS_MR ], 7, 7, 3, 3, + 8, 8, 4, 4 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 32, 64, 32 ); + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 56, 56, 24, 24 ); + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 512, 256, 512, 256 ); + bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); + // Default BLIS_BBM_s = 1, but set here to ensure it's correct + bli_blksz_init_easy( &blkszs[ BLIS_BBM ], 1, 1, 1, 1 ); + bli_blksz_init_easy( &blkszs[ BLIS_BBN ], 1, 1, 1, 1 ); + + // Update the context with the current architecture's register and cache + // blocksizes (and multiples) for native execution. + bli_cntx_set_blkszs + ( + cntx, + + // level-3 + BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, + BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, + BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, + BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, + BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, + + // level-1m + BLIS_BBM, &blkszs[ BLIS_BBM ], BLIS_BBM, + BLIS_BBN, &blkszs[ BLIS_BBN ], BLIS_BBN, + + BLIS_VA_END + ); } diff --git a/config/sifive_x280/make_defs.mk b/config/sifive_x280/make_defs.mk index 782d8a3823..c509c3d664 100644 --- a/config/sifive_x280/make_defs.mk +++ b/config/sifive_x280/make_defs.mk @@ -1,6 +1,6 @@ # # -# BLIS +# BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # @@ -49,10 +49,11 @@ THIS_CONFIG := sifive_x280 CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl512b -mabi=lp64d CPPROCFLAGS := CMISCFLAGS := $(CMISCFLAGS_SIFIVE) -fdata-sections -ffunction-sections \ - -fdiagnostics-color=always -fno-rtti -fno-exceptions -std=gnu++17 + -fdiagnostics-color=always -fno-rtti -fno-exceptions \ + -std=gnu++17 CPICFLAGS := -fPIC -CWARNFLAGS := -Wall -Wextra -Wno-unused-function \ - -Wno-unused-parameter -Wno-sign-compare -Wno-unused-variable +CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ + -Wno-sign-compare -Wno-unused-variable ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g @@ -66,7 +67,7 @@ endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -CKVECFLAGS := +CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) diff --git a/config_registry b/config_registry index 72be8b07a4..c637d0d771 100644 --- a/config_registry +++ b/config_registry @@ -14,10 +14,7 @@ amd64_legacy: excavator steamroller piledriver bulldozer generic amd64: zen3 zen2 zen generic arm64: armsve firestorm thunderx2 cortexa57 cortexa53 generic arm32: cortexa15 cortexa9 generic -power: power10 power9 generic - -# SiFive architectures -sifive_x280: sifive_x280 +power: power10 power9 generic # Intel architectures. skx: skx/skx/haswell/zen @@ -50,5 +47,8 @@ power10: power10 power9: power9 bgq: bgq +# SiFive architectures. +sifive_x280: sifive_x280 + # Generic architectures. generic: generic diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index f37e42659e..b2d69e3356 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -180,11 +180,6 @@ arch_t bli_arch_query_id_impl( void ) id = bli_cpuid_query_id(); #endif - // SiFive microarchitectures. - #ifdef BLIS_FAMILY_SIFIVE_X280 - id = BLIS_ARCH_SIFIVE_X280; - #endif - // Intel microarchitectures. #ifdef BLIS_FAMILY_SKX id = BLIS_ARCH_SKX; @@ -268,6 +263,11 @@ arch_t bli_arch_query_id_impl( void ) id = BLIS_ARCH_BGQ; #endif + // SiFive microarchitectures. + #ifdef BLIS_FAMILY_SIFIVE_X280 + id = BLIS_ARCH_SIFIVE_X280; + #endif + // Generic microarchitecture. #ifdef BLIS_FAMILY_GENERIC id = BLIS_ARCH_GENERIC; @@ -294,8 +294,6 @@ arch_t bli_arch_query_id_impl( void ) // enum value given to the corresponding BLIS_ARCH_ value. static const char* config_name[ BLIS_NUM_ARCHS ] = { - "sifive_x280", - "skx", "knl", "knc", @@ -325,6 +323,8 @@ static const char* config_name[ BLIS_NUM_ARCHS ] = "power7", "bgq", + "sifive_x280", + "generic" }; diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index bc36ce6a98..c779cbb1fd 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -71,14 +71,6 @@ void bli_gks_init( void ) // Register a context for each architecture that was #define'd in // bli_config.h. - // -- SiFive architectures ---------------------------------------------- - -#ifdef BLIS_CONFIG_SIFIVE_X280 - bli_gks_register_cntx( BLIS_ARCH_SIFIVE_X280, bli_cntx_init_sifive_x280, - bli_cntx_init_sifive_x280_ref, - bli_cntx_init_sifive_x280_ind ); -#endif - // -- Intel architectures ---------------------------------------------- #ifdef BLIS_CONFIG_SKX @@ -223,6 +215,14 @@ void bli_gks_init( void ) bli_cntx_init_bgq_ind ); #endif + // -- SiFive architectures ---------------------------------------------- + +#ifdef BLIS_CONFIG_SIFIVE_X280 + bli_gks_register_cntx( BLIS_ARCH_SIFIVE_X280, bli_cntx_init_sifive_x280, + bli_cntx_init_sifive_x280_ref, + bli_cntx_init_sifive_x280_ind ); +#endif + // -- Generic architectures -------------------------------------------- #ifdef BLIS_CONFIG_GENERIC diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index c348d4be4f..e31e6e47e7 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -41,12 +41,6 @@ // -- Context initialization prototypes ---------------------------------------- // -// -- SiFive architectures -- - -#ifdef BLIS_CONFIG_SIFIVE_X280 -CNTX_INIT_PROTS( sifive_x280 ) -#endif - // -- Intel64 architectures -- #ifdef BLIS_CONFIG_SKX @@ -137,6 +131,12 @@ CNTX_INIT_PROTS( power7 ) CNTX_INIT_PROTS( bgq ) #endif +// -- SiFive architectures -- + +#ifdef BLIS_CONFIG_SIFIVE_X280 +CNTX_INIT_PROTS( sifive_x280 ) +#endif + // -- Generic -- #ifdef BLIS_CONFIG_GENERIC @@ -148,12 +148,6 @@ CNTX_INIT_PROTS( generic ) // -- Architecture family-specific headers ------------------------------------- // -// -- SiFive families -- - -#ifdef BLIS_FAMILY_SIFIVE_X280 -#include "bli_family_sifive_x280.h" -#endif - // -- x86_64 families -- #ifdef BLIS_FAMILY_INTEL64 @@ -272,6 +266,12 @@ CNTX_INIT_PROTS( generic ) #include "bli_family_bgq.h" #endif +// -- SiFive families -- + +#ifdef BLIS_FAMILY_SIFIVE_X280 +#include "bli_family_sifive_x280.h" +#endif + // -- Generic -- #ifdef BLIS_FAMILY_GENERIC @@ -283,11 +283,6 @@ CNTX_INIT_PROTS( generic ) // -- kernel set prototypes ---------------------------------------------------- // -// -- SiFive RISC-V architectures -- -#ifdef BLIS_KERNELS_SIFIVE_X280 -#include "bli_kernels_sifive_x280.h" -#endif - // -- Intel64 architectures -- #ifdef BLIS_KERNELS_SKX #include "bli_kernels_skx.h" @@ -359,6 +354,11 @@ CNTX_INIT_PROTS( generic ) #include "bli_kernels_bgq.h" #endif +// -- SiFive RISC-V architectures -- +#ifdef BLIS_KERNELS_SIFIVE_X280 +#include "bli_kernels_sifive_x280.h" +#endif + #endif diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 5c79b8e640..c373797961 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -928,9 +928,6 @@ typedef enum // NOTE: The C language standard guarantees that the first enum value // starts at 0. - // SiFive - BLIS_ARCH_SIFIVE_X280, - // Intel BLIS_ARCH_SKX, BLIS_ARCH_KNL, @@ -968,6 +965,9 @@ typedef enum BLIS_ARCH_POWER7, BLIS_ARCH_BGQ, + // SiFive + BLIS_ARCH_SIFIVE_X280, + // Generic architecture/configuration BLIS_ARCH_GENERIC,