Skip to content

Commit

Permalink
ArmSVE Apply *beta == 0.0 Fix for NaN Dealing
Browse files Browse the repository at this point in the history
Proposed in flame/blis#552 .
  • Loading branch information
xrq-phys committed Oct 2, 2021
1 parent 3462341 commit fc9be95
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
7 changes: 7 additions & 0 deletions src/configs/a64fx/armsve_asm_2vx10.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@
SCALE_COL4(Z12,Z13,Z14,Z15,ZFACTOR) \
SCALE_COL4(Z16,Z17,Z18,Z19,ZFACTOR)

#define GEMM_C_FMLA_UKER(C0FH,C1FH,C2FH,C3FH,C4FH,C0LH,C1LH,C2LH,C3LH,C4LH,PT,Z0FH,Z1FH,Z2FH,Z3FH,Z4FH,Z0LH,Z1LH,Z2LH,Z3LH,Z4LH,ZSCALE) \
GEMM_FMLA2(C0FH,C0LH,PT,Z0FH,Z0LH,ZSCALE) \
GEMM_FMLA2(C1FH,C1LH,PT,Z1FH,Z1LH,ZSCALE) \
GEMM_FMLA2(C2FH,C2LH,PT,Z2FH,Z2LH,ZSCALE) \
GEMM_FMLA2(C3FH,C3LH,PT,Z3FH,Z3LH,ZSCALE) \
GEMM_FMLA2(C4FH,C4LH,PT,Z4FH,Z4LH,ZSCALE)

#define GEMM_C_FMAD_UKER(Z0FH,Z1FH,Z2FH,Z3FH,Z4FH,Z0LH,Z1LH,Z2LH,Z3LH,Z4LH,PFH,PLH,C0FH,C1FH,C2FH,C3FH,C4FH,C0LH,C1LH,C2LH,C3LH,C4LH,ZSCALE) \
GEMM_CCOL_FMAD(Z0FH,Z0LH,PFH,PLH,C0FH,C0LH,ZSCALE) \
GEMM_CCOL_FMAD(Z1FH,Z1LH,PFH,PLH,C1FH,C1LH,ZSCALE) \
Expand Down
23 changes: 16 additions & 7 deletions src/configs/a64fx/bli_gemm_armsve_asm_2vx10_unindexed.c
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,16 @@ SCALE_COL20(z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15,z16,z17,z18,z1
DT "WRITE_MEM_C: \n\t" /* Available scratch: Z[20-30]. */ \
" \n\t" /* Here used scratch: Z[20-29]. */ \
/* First half of C is already loaded in this case. */ \
GEMM_C_FMAD_LOAD_UKER_C(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,p0,p0,z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,z31,x9,x7) \
/* GEMM_C_FMAD_LOAD_UKER_C(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,p0,p0,z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,z31,x9,x7) */ \
" fcmp " DT "31, #0.0 \n\t" /* Skip loading for *beta = 0.0. */ \
" b.eq " DT "BETA_ZERO_C \n\t" \
GEMM_C_FMLA_UKER(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,p0,z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z31) \
GEMM_C_LOAD_UKER_C(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,p0,p0,x9,x7) \
GEMM_C_FMLA_UKER(z10,z12,z14,z16,z18,z11,z13,z15,z17,z19,p0,z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z31) \
" \n\t" \
GEMM_C_STORE_UKER_C(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,p0,p0,x5,x7) \
GEMM_C_FMAD_UKER(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,p0,p0,z10,z12,z14,z16,z18,z11,z13,z15,z17,z19,z31) \
DT "BETA_ZERO_C: \n\t" \
GEMM_C_STORE_UKER_C(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,p0,p0,x5,x7) \
GEMM_C_STORE_UKER_C(z10,z12,z14,z16,z18,z11,z13,z15,z17,z19,p0,p0,x5,x7) \
" b " DT "END_WRITE_MEM \n\t" \
" \n\t" \
DT "WRITE_MEM_G: \n\t" /* Available scratch: Z[20-30]. */ \
Expand All @@ -277,13 +282,17 @@ GEMM_C_STORE_UKER_C(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,p0,p0,x5,x7) \
" incb x8 \n\t" \
" madd x8, x8, x6, xzr \n\t" /* C-column's logical 1-vector skip. */ \
" index z30." DT ", " HF "zr, " HF "6 \n\t" /* Skips passed to index is not multiplied by 8. */ \
" \n\t" \
" fcmp " DT "31, #0.0 \n\t" /* Skip loading for *beta = 0.0. */ \
" b.eq " DT "BETA_ZERO_G \n\t" \
GEMM_C_LOAD_UKER_G(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z30,p0,p0,x9,x7,x8,x16) \
GEMM_C_FMLA_UKER(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,p0,z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z31) \
GEMM_C_LOAD_UKER_G(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z30,p0,p0,x9,x7,x8,x16) \
GEMM_C_FMAD_UKER(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,p0,p0,z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,z31) \
GEMM_C_LOAD_UKER_G(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,z30,p0,p0,x9,x7,x8,x16) \
GEMM_C_FMLA_UKER(z10,z12,z14,z16,z18,z11,z13,z15,z17,z19,p0,z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z31) \
" \n\t" \
GEMM_C_STORE_UKER_G(z20,z22,z24,z26,z28,z21,z23,z25,z27,z29,z30,p0,p0,x5,x7,x8,x16) \
GEMM_C_FMAD_UKER(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,p0,p0,z10,z12,z14,z16,z18,z11,z13,z15,z17,z19,z31) \
DT "BETA_ZERO_G: \n\t" \
GEMM_C_STORE_UKER_G(z0,z2,z4,z6,z8,z1,z3,z5,z7,z9,z30,p0,p0,x5,x7,x8,x16) \
GEMM_C_STORE_UKER_G(z10,z12,z14,z16,z18,z11,z13,z15,z17,z19,z30,p0,p0,x5,x7,x8,x16) \
" \n\t" \
DT "END_WRITE_MEM: \n\t" \
" b " DT "END_EXEC \n\t" \
Expand Down

0 comments on commit fc9be95

Please sign in to comment.