diff --git a/src/ATen/native/xpu/sycl/BatchNormKernels.cpp b/src/ATen/native/xpu/sycl/BatchNormKernels.cpp index 61a960e30..f50524db5 100644 --- a/src/ATen/native/xpu/sycl/BatchNormKernels.cpp +++ b/src/ATen/native/xpu/sycl/BatchNormKernels.cpp @@ -1302,25 +1302,16 @@ struct BatchNormTransformInputVectorizedKernelFunctor { for (index_t feature_vec_begin = item.get_local_id(1) * VEC_SIZE; feature_vec_begin < fs; feature_vec_begin += VEC_SIZE * item.get_local_range(1)) { - auto remaining = fs - feature_vec_begin; - if (remaining < VEC_SIZE) { - for (index_t idx = 0; idx < remaining; ++idx) { - index_t feature = feature_vec_begin + idx; - o[feature] = static_cast( - gamma * (i[feature] - mean) * invstd + beta); - } - } else { - using vec_t = memory::aligned_vector; - vec_t vec; + using vec_t = memory::aligned_vector; + vec_t vec; #pragma unroll - for (int vt = 0; vt < VEC_SIZE; ++vt) { - index_t feature = feature_vec_begin + vt; - vec[vt] = static_cast( - gamma * (i[feature] - mean) * invstd + beta); - } - input_scalar_t* write_ptr = &o[feature_vec_begin]; - *(reinterpret_cast(write_ptr)) = vec; + for (int vt = 0; vt < VEC_SIZE; ++vt) { + index_t feature = feature_vec_begin + vt; + vec[vt] = static_cast( + gamma * (i[feature] - mean) * invstd + beta); } + input_scalar_t* write_ptr = &o[feature_vec_begin]; + *(reinterpret_cast(write_ptr)) = vec; } } }