diff --git a/Source/MatrixFunctions/arm_mat_cholesky_f32.c b/Source/MatrixFunctions/arm_mat_cholesky_f32.c index e1ad00d1..6b6de661 100755 --- a/Source/MatrixFunctions/arm_mat_cholesky_f32.c +++ b/Source/MatrixFunctions/arm_mat_cholesky_f32.c @@ -258,12 +258,17 @@ ARM_DSP_ATTRIBUTE arm_status arm_mat_cholesky_f32( vecGj1=vld1q_f32(&pG[(j + 1) * n + k]); vecGj2=vld1q_f32(&pG[(j + 2) * n + k]); vecGj3=vld1q_f32(&pG[(j + 3) * n + k]); - +#if defined(__ARM_FEATURE_FMA) acc0 = vfmaq_f32(acc0, vecGi, vecGj0); acc1 = vfmaq_f32(acc1, vecGi, vecGj1); acc2 = vfmaq_f32(acc2, vecGi, vecGj2); acc3 = vfmaq_f32(acc3, vecGi, vecGj3); - +#else + acc0 = vmlaq_f32(acc0, vecGi, vecGj0); + acc1 = vmlaq_f32(acc1, vecGi, vecGj1); + acc2 = vmlaq_f32(acc2, vecGi, vecGj2); + acc3 = vmlaq_f32(acc3, vecGi, vecGj3); +#endif kCnt--; k+=4; } @@ -319,9 +324,11 @@ ARM_DSP_ATTRIBUTE arm_status arm_mat_cholesky_f32( vecGi=vld1q_f32(&pG[i * n + k]); vecGj=vld1q_f32(&pG[j * n + k]); - +#if defined(__ARM_FEATURE_FMA) acc = vfmaq_f32(acc, vecGi, vecGj); - +#else + acc = vmlaq_f32(acc, vecGi, vecGj); +#endif kCnt--; k+=4; } diff --git a/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c b/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c index c7cdecc3..6b83d0a5 100755 --- a/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c +++ b/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c @@ -209,7 +209,11 @@ for(k=0; k < i; k++) { vecX = vld1q_f32(&pX[cols*k+j]); +#if defined(__ARM_FEATURE_FMA) vecA = vfmsq_f32(vecA,vdupq_n_f32(pLT[n*i + k]),vecX); +#else + vecA = vmlsq_f32(vecA,vdupq_n_f32(pLT[n*i + k]),vecX); +#endif } if (pLT[n*i + i]==0.0f) diff --git a/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c b/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c index f58dfbd9..6fe69a30 100755 --- a/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c +++ b/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c @@ -197,7 +197,11 @@ arm_status status; /* status of matrix inverse */ for(k=n-1; k > i; k--) { vecX = vld1q_f32(&pX[cols*k+j]); +#if defined(__ARM_FEATURE_FMA) vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX); +#else + vecA = vmlsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX); +#endif } if (pUT[n*i + i]==0.0f) diff --git a/Source/StatisticsFunctions/arm_mse_f32.c b/Source/StatisticsFunctions/arm_mse_f32.c index b4c67615..d95a90fd 100755 --- a/Source/StatisticsFunctions/arm_mse_f32.c +++ b/Source/StatisticsFunctions/arm_mse_f32.c @@ -132,8 +132,11 @@ ARM_DSP_ATTRIBUTE void arm_mse_f32( pSrcB += 4; vecA = vsubq_f32(vecA, vecB); - +#if defined(__ARM_FEATURE_FMA) vecSum = vfmaq_f32(vecSum, vecA, vecA); +#else + vecSum = vmlaq_f32(vecSum, vecA, vecA); +#endif /* * Decrement the blockSize loop counter */