diff --git a/src/ATen/native/xpu/sycl/SortingKernels.h b/src/ATen/native/xpu/sycl/SortingKernels.h index fb29a9f53..b39595143 100644 --- a/src/ATen/native/xpu/sycl/SortingKernels.h +++ b/src/ATen/native/xpu/sycl/SortingKernels.h @@ -539,9 +539,7 @@ void segmented_sort_pairs_( int num_elements) { constexpr int scaling_coef = sizeof(key_t) * sizeof(value_t) >= 64 ? 2 - : 1; // Attempt to reduce register pressure. The result will be incorrect - // when using too many local variables (registers). - // https://github.com/intel/torch-xpu-ops/issues/626 + : 1; // Attempt to reduce register pressure for performance. if (num_elements > 4096 / scaling_coef) { // Considering register pressure, we use a problem size of 4096 to delineate // the boundary between single tile sort and group sort.