intel · fengyuan14 · Jul 31, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 29, 2024
diff --git a/src/ATen/native/xpu/Loss.cpp b/src/ATen/native/xpu/Loss.cpp
@@ -80,7 +80,6 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
   return grad_input;
 }
 
-
 Tensor& XPUNativeFunctions::smooth_l1_loss_out(
     const Tensor& input,
     const Tensor& target,

diff --git a/src/ATen/native/xpu/ReplicationPadding.cpp b/src/ATen/native/xpu/ReplicationPadding.cpp
@@ -34,9 +34,13 @@ void replication_pad1d_meta(
   int64_t iwidth = input.size(dimw);
   int64_t owidth = iwidth + pad_l + pad_r;
 
-  TORCH_CHECK(owidth >= 1,
-      "input (W: ", iwidth, ") is too small."
-      " Calculated output W: ", owidth);
+  TORCH_CHECK(
+      owidth >= 1,
+      "input (W: ",
+      iwidth,
+      ") is too small."
+      " Calculated output W: ",
+      owidth);
 
   if (output.defined()) {
     if (input.ndimension() == 2) {
@@ -69,11 +73,14 @@ void replication_pad1d_backward_meta(
 
   /* sizes */
   int64_t iwidth = input.size(dimw);
-  int64_t owidth  = iwidth + pad_l + pad_r;
+  int64_t owidth = iwidth + pad_l + pad_r;
 
-  TORCH_CHECK(owidth == grad_output.size(dimw),
-      "grad_output width unexpected. Expected: ", owidth,
-      " Got: ", grad_output.size(dimw));
+  TORCH_CHECK(
+      owidth == grad_output.size(dimw),
+      "grad_output width unexpected. Expected: ",
+      owidth,
+      " Got: ",
+      grad_output.size(dimw));
 
   if (grad_input.defined()) {
     xpu::resize_out(grad_input, input.sizes(), {}, input.options());
@@ -110,25 +117,30 @@ void replication_pad2d_meta(
   int64_t iheight = input.size(dimh);
   int64_t iwidth = input.size(dimw);
   int64_t oheight = iheight + pad_t + pad_b;
-  int64_t owidth  = iwidth + pad_l + pad_r;
+  int64_t owidth = iwidth + pad_l + pad_r;
 
-  TORCH_CHECK(owidth >= 1 || oheight >= 1,
-      "input (H: ", iheight, ", W: ", iwidth, " ) is too small."
-      " Calculated output H: ", oheight, " W: ", owidth);
+  TORCH_CHECK(
+      owidth >= 1 || oheight >= 1,
+      "input (H: ",
+      iheight,
+      ", W: ",
+      iwidth,
+      " ) is too small."
+      " Calculated output H: ",
+      oheight,
+      " W: ",
+      owidth);
 
   if (output.defined()) {
     if (input.dim() == 3) {
-      xpu::resize_out(
-          output,
-          {nslices, oheight, owidth}, {}, input.options());
+      xpu::resize_out(output, {nslices, oheight, owidth}, {}, input.options());
     } else {
       xpu::resize_out(
           output, {nbatch, nslices, oheight, owidth}, {}, input.options());
     }
   } else {
     if (input.dim() == 3) {
-      output = xpu::create_out(
-          {nslices, oheight, owidth}, {}, input.options());
+      output = xpu::create_out({nslices, oheight, owidth}, {}, input.options());
     } else {
       output = xpu::create_out(
           {nbatch, nslices, oheight, owidth}, {}, input.options());
@@ -170,21 +182,34 @@ void replication_pad3d_meta(
   int64_t iwidth = input.size(dimw);
   int64_t odepth = idepth + pfront + pback;
   int64_t oheight = iheight + ptop + pbottom;
-  int64_t owidth  = iwidth + pleft + pright;
-
-  TORCH_CHECK(owidth >= 1 || oheight >= 1 || odepth >= 1,
-      "input (D: ", idepth, " H: ", iheight, ", W: ", iwidth,
+  int64_t owidth = iwidth + pleft + pright;
+
+  TORCH_CHECK(
+      owidth >= 1 || oheight >= 1 || odepth >= 1,
+      "input (D: ",
+      idepth,
+      " H: ",
+      iheight,
+      ", W: ",
+      iwidth,
       ") is too small."
-      " Calculated output D: ", odepth, " H: ", oheight, " W: ", owidth);
+      " Calculated output D: ",
+      odepth,
+      " H: ",
+      oheight,
+      " W: ",
+      owidth);
 
   if (output.defined()) {
     if (input.dim() == 4) {
       xpu::resize_out(
-          output,
-          {nslices, odepth, oheight, owidth}, {}, input.options());
+          output, {nslices, odepth, oheight, owidth}, {}, input.options());
     } else {
       xpu::resize_out(
-          output, {nbatch, nslices, odepth, oheight, owidth}, {}, input.options());
+          output,
+          {nbatch, nslices, odepth, oheight, owidth},
+          {},
+          input.options());
     }
   } else {
     if (input.dim() == 4) {

diff --git a/src/ATen/native/xpu/UpSample.h b/src/ATen/native/xpu/UpSample.h
@@ -228,4 +228,33 @@ static scalar_t upsample_get_value_bounded(
   return data[batch][channel][access_y][access_x];
 }
 
+static C10_UNUSED std::array<int64_t, 3> upsample_1d_common_check(
+    IntArrayRef input_size,
+    IntArrayRef output_size) {
+  TORCH_CHECK(
+      output_size.size() == 1,
+      "It is expected output_size equals to 1, but got size ",
+      output_size.size());
+
+  TORCH_CHECK(
+      input_size.size() == 3,
+      "It is expected input_size equals to 3, but got size ",
+      input_size.size());
+
+  int64_t output_width = output_size[0];
+  int64_t nbatch = input_size[0];
+  int64_t channels = input_size[1];
+  int64_t input_width = input_size[2];
+
+  TORCH_CHECK(
+      input_width > 0 && output_width > 0,
+      "Input and output sizes should be greater than 0, but got input (W: ",
+      input_width,
+      ") and output (W: ",
+      output_width,
+      ")");
+
+  return {nbatch, channels, output_width};
+}
+
 } // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/UpSampleLinear1d.cpp b/src/ATen/native/xpu/UpSampleLinear1d.cpp
@@ -0,0 +1,111 @@
+#include <ATen/ATen.h>
+#include <ATen/native/xpu/UpSample.h>
+#include <ATen/native/xpu/sycl/UpSampleLinear1dKernels.h>
+#include <ATen/xpu/XPUNativeFunctions.h>
+#include <comm/RegisterUtils.h>
+#include "ATen/core/ATen_fwd.h"
+
+namespace at {
+
+void upsample_linear1d_meta(
+    const Tensor& input,
+    IntArrayRef output_size,
+    bool align_corners,
+    std::optional<double> scales,
+    Tensor& output) {
+  auto full_output_size =
+      at::native::xpu::upsample_1d_common_check(input.sizes(), output_size);
+
+  // Allow for empty batch size but not other dimensions
+  TORCH_CHECK(
+      (input.size(1) != 0 && input.size(2) != 0) && input.dim() == 3,
+      "Non-empty 3D data tensor expected but got a tensor with sizes ",
+      input.sizes());
+
+  if (output.defined()) {
+    at::xpu::resize_out(output, full_output_size, {}, input.options());
+  } else {
+    output = at::xpu::create_out(full_output_size, {}, input.options());
+  }
+}
+void upsample_linear1d_backward_meta(
+    const Tensor& grad_output,
+    IntArrayRef output_size,
+    IntArrayRef input_size,
+    bool align_corners,
+    std::optional<double> scales,
+    Tensor& grad_input) {
+  auto full_output_size =
+      at::native::xpu::upsample_1d_common_check(input_size, output_size);
+
+  TORCH_CHECK(
+      input_size.size() == 3,
+      "It is expected input_size equals to 3, but got size ",
+      input_size.size());
+
+  check_dim_size(grad_output, 3, 0, full_output_size[0]);
+  check_dim_size(grad_output, 3, 1, full_output_size[1]);
+  check_dim_size(grad_output, 3, 2, full_output_size[2]);
+
+  if (grad_input.defined()) {
+    at::xpu::resize_out(grad_input, input_size, {}, grad_output.options());
+  } else {
+    grad_input = at::xpu::create_out(input_size, {}, grad_output.options());
+  }
+}
+
+Tensor XPUNativeFunctions::upsample_linear1d(
+    const Tensor& input,
+    IntArrayRef output_size,
+    bool align_corners,
+    std::optional<double> scales) {
+  Tensor output;
+  return upsample_linear1d_out(
+      input, output_size, align_corners, scales, output);
+}
+
+Tensor& XPUNativeFunctions::upsample_linear1d_out(
+    const Tensor& input,
+    IntArrayRef output_size,
+    bool align_corners,
+    std::optional<double> scales,
+    Tensor& output) {
+  upsample_linear1d_meta(input, output_size, align_corners, scales, output);
+
+  TensorArg input_arg{input, "input", 1}, output_arg{output, "output", 2};
+  checkAllSameGPU(__func__, {input_arg, output_arg});
+
+  native::xpu::upsample_linear1d_kernel(
+      input, output_size, align_corners, scales, output);
+  return output;
+}
+Tensor XPUNativeFunctions::upsample_linear1d_backward(
+    const Tensor& grad_output,
+    IntArrayRef output_size,
+    IntArrayRef input_size,
+    bool align_corners,
+    std::optional<double> scales) {
+  Tensor grad_input;
+  return upsample_linear1d_backward_out(
+      grad_output, output_size, input_size, align_corners, scales, grad_input);
+}
+
+Tensor& XPUNativeFunctions::upsample_linear1d_backward_out(
+    const Tensor& grad_output,
+    IntArrayRef output_size,
+    IntArrayRef input_size,
+    bool align_corners,
+    std::optional<double> scales,
+    Tensor& grad_input) {
+  upsample_linear1d_backward_meta(
+      grad_output, output_size, input_size, align_corners, scales, grad_input);
+
+  TensorArg grad_output_arg{grad_output, "grad_output", 1},
+      grad_input_arg{grad_input, "grad_input", 2};
+  checkAllSameGPU(__func__, {grad_output_arg, grad_input_arg});
+  native::xpu::upsample_linear1d_backward_kernel(
+      grad_output, output_size, input_size, align_corners, scales, grad_input);
+  return grad_input;
+}
+
+} // namespace at
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
@@ -298,8 +298,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "trunc.out",
     "upsample_bicubic2d_backward.grad_input",
     "_upsample_bilinear2d_aa.out",
-    "upsample_linear1d_backward.grad_input",
-    "upsample_linear1d.out",
     "upsample_nearest3d.out",
     "upsample_nearest3d_backward.grad_input",
     "_upsample_nearest_exact3d.out",