diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake
index e6c369a49..57b65e0e6 100644
--- a/cmake/Codegen.cmake
+++ b/cmake/Codegen.cmake
@@ -9,6 +9,7 @@ file(MAKE_DIRECTORY ${BUILD_TORCH_XPU_ATEN_GENERATED})
 set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
 set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp)
 set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp)
+set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp)
 set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template)
 
 if(WIN32)
@@ -49,6 +50,7 @@ endfunction(GEN_BACKEND)
 set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
 set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp)
 set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp)
+set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp)
 set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template)
 set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend)
 function(GEN_XPU file_yaml)
@@ -79,7 +81,7 @@ function(GEN_XPU file_yaml)
     --install-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
     --per-operator-headers
     --static-dispatch-backend
-    --backend-whitelist XPU SparseXPU NestedTensorXPU
+    --backend-whitelist XPU SparseXPU NestedTensorXPU SparseCsrXPU
     # --xpu: generate in-tree RegisterXPU_0.cpp for in-tree OPs
     --xpu
     # --update-aoti-c-shim: generate extend/c_shim_xpu.h
@@ -96,6 +98,7 @@ function(GEN_XPU file_yaml)
     COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_PATH}
     COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_PATH}
     COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_PATH}
+    COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_PATH}
     ${SIMPLE_TRACE}
     WORKING_DIRECTORY ${TORCH_ROOT}
     DEPENDS
@@ -126,6 +129,7 @@ GEN_XPU(
   ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp
   ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp
   ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp
+  ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp
   ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h
   ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp
 )
@@ -137,7 +141,7 @@ GEN_XPU(
 # $TORCH_XPU_OPS_INCLUDE_DIRS, so that "#include <ATen/ops/*.h>" works.
 list(APPEND TORCH_XPU_OPS_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/xpu)
 
-list(APPEND xpu_generated_src ${RegisterXPU_PATH} ${RegisterSparseXPU_PATH} ${RegisterNestedTensorXPU_PATH})
+list(APPEND xpu_generated_src ${RegisterXPU_PATH} ${RegisterSparseXPU_PATH} ${RegisterNestedTensorXPU_PATH} ${RegisterSparseCsrXPU_PATH})
 list(APPEND xpu_generated_src ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp)
 add_custom_target(TORCH_XPU_GEN_TARGET DEPENDS ${xpu_generated_src})
 set(ATen_XPU_GEN_SRCS ${xpu_generated_src})
diff --git a/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp b/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp
new file mode 100644
index 000000000..2ee5ff6ee
--- /dev/null
+++ b/src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp
@@ -0,0 +1,34 @@
+#pragma once
+#include <xpu/ATen/ops/_convert_indices_from_coo_to_csr_native.h>
+#include <xpu/ATen/ops/_convert_indices_from_csr_to_coo_native.h>
+#include <ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h>
+
+namespace at::native{
+
+TORCH_IMPL_FUNC(_convert_indices_from_coo_to_csr_structured_xpu)(
+    const Tensor& input, 
+    const int64_t size, 
+    const bool out_int32, 
+    const Tensor& result){
+        xpu::convert_indices_from_coo_to_csr_structured_kernel(
+            input, 
+            size, 
+            out_int32, 
+            result);
+    };
+
+TORCH_IMPL_FUNC(_convert_indices_from_csr_to_coo_structured_xpu)(
+    const Tensor& crow_indices,
+    const Tensor& col_indices,
+    const bool out_int32,
+    const bool transpose,
+    const Tensor& result){
+        xpu::convert_indices_from_csr_to_coo_structured_kernel(
+            crow_indices,
+            col_indices,
+            out_int32,
+            transpose,
+            result);
+    };
+
+} // namespace at::native
\ No newline at end of file
diff --git a/src/ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.cpp b/src/ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.cpp
new file mode 100644
index 000000000..34f9ff06f
--- /dev/null
+++ b/src/ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.cpp
@@ -0,0 +1,199 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+#include <ATen/core/Tensor.h>
+#include <ATen/Dispatch.h>
+#include <ATen/ExpandUtils.h>
+#include <ATen/InitialTensorOptions.h>
+#include <ATen/SparseCsrTensorImpl.h>
+#include <ATen/SparseCsrTensorUtils.h>
+#include <ATen/WrapDimUtilsMulti.h>
+#include <ATen/native/BinaryOps.h>
+#include <ATen/native/Resize.h>
+#include <ATen/native/SparseTensorUtils.h>
+#include <algorithm>
+#include <ATen/AccumulateType.h>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/NativeFunctions.h>
+#else
+#include <ATen/ops/_sparse_csr_tensor_unsafe_native.h>
+#include <ATen/ops/_unique.h>
+#include <ATen/ops/add_native.h>
+#include <ATen/ops/resize_as_sparse_native.h>
+#include <ATen/ops/tensor.h>
+#include <ATen/ops/zeros.h>
+#endif
+
+#include <ATen/native/xpu/sycl/Loops.h>
+#include <ATen/native/xpu/sycl/pstl/PSTLFunctions.h>
+#include <ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h>
+#include <comm/SYCLContext.h>
+
+namespace at::native::xpu{
+
+template <typename input_t, typename output_t>
+struct convertIndicesFromCooToCsrXPUFunctor{
+  void operator()(sycl::nd_item<1> itemId) const {
+    auto linear_id = itemId.get_global_linear_id();
+    if (linear_id == 0) {
+      for (int64_t i = 0; i <= data_in[0]; i++)
+        data_out[i] = static_cast<output_t>(0);
+    } else if (linear_id < numel) {
+      for (int64_t i = data_in[linear_id - 1]; i < data_in[linear_id]; i++)
+        data_out[i + 1] = static_cast<output_t>(linear_id);
+    } else if (linear_id == numel) {
+      for (int64_t i = data_in[numel - 1] + 1; i < size + 1; i++)
+        data_out[i] = static_cast<output_t>(numel);
+    }
+  }
+  convertIndicesFromCooToCsrXPUFunctor(
+      int64_t numel_,
+      const input_t* data_in_,
+      output_t* data_out_,
+      const int64_t size_)
+      : numel(numel_), data_in(data_in_), data_out(data_out_), size(size_) {}
+
+ private:
+  int64_t numel;
+  const input_t* data_in;
+  output_t* data_out;
+  const int64_t size;
+};
+
+template <typename input_t, typename output_t>
+struct convertIndicesFromCsrToCooXPUFunctor {
+  void operator()(sycl::nd_item<1> itemId) const {
+    int64_t linear_id = itemId.get_global_linear_id();
+    if (linear_id < nrows) {
+      for (int64_t i = crow_indices_data_in[linear_id];
+           i < crow_indices_data_in[linear_id + 1];
+           i++)
+        data_out[i] = static_cast<output_t>(linear_id);
+    }
+  }
+  convertIndicesFromCsrToCooXPUFunctor(
+      int64_t nrows_,
+      const input_t* crow_indices_data_in_,
+      output_t* data_out_)
+      : nrows(nrows_),
+        crow_indices_data_in(crow_indices_data_in_),
+        data_out(data_out_) {}
+
+ private:
+  int64_t nrows;
+  const input_t* crow_indices_data_in;
+  output_t* data_out;
+};
+
+template <typename input_t, typename output_t>
+void launch_convert_indices_from_coo_to_csr_xpu_kernel(
+    const Tensor& result,
+    const Tensor& input,
+    const int64_t size){
+
+    int64_t numel = input.numel();
+    if (numel == 0) {
+        result.zero_();
+        return;
+    }
+
+    const input_t* data_in = input.const_data_ptr<input_t>();
+    output_t* data_out = result.data_ptr<output_t>();
+
+    int64_t wgroup_size = 64;
+    int64_t ngroups = (numel + wgroup_size - 1) / wgroup_size;
+    sycl::range<1> global_range(ngroups * wgroup_size);
+    sycl::range<1> local_range(wgroup_size);
+
+    auto functor = convertIndicesFromCooToCsrXPUFunctor<input_t, output_t>(
+        numel,
+        data_in,
+        data_out,
+        size);
+
+    sycl_kernel_submit(global_range, local_range, getCurrentSYCLQueue(), functor);
+}
+
+
+template <typename input_t, typename output_t>
+void launch_convert_indices_from_csr_to_coo_xpu_kernel(
+    const Tensor& indices,
+    const Tensor& crow_indices,
+    const Tensor& col_indices,
+    const bool transpose = false) {
+  int64_t nrows = crow_indices.numel() - 1;
+
+  if (nrows == 0) {
+    indices.zero_();
+    return;
+  }
+
+  auto crow_indices_ = crow_indices.expect_contiguous();
+  const input_t* crow_indices_data_in = crow_indices_->data_ptr<input_t>();
+  TORCH_INTERNAL_ASSERT(indices.is_contiguous());
+  auto row0 = indices.select(0, transpose ? 1 : 0);
+  auto row1 = indices.select(0, transpose ? 0 : 1);
+  output_t* data_out = row0.data_ptr<output_t>();
+  row1.copy_(*col_indices.expect_contiguous());
+
+  int64_t wgroup_size = 64;
+  int64_t ngroups = (nrows + wgroup_size - 1) / wgroup_size;
+  sycl::range<1> global_range(ngroups * wgroup_size);
+  sycl::range<1> local_range(wgroup_size);
+
+  auto functor = convertIndicesFromCsrToCooXPUFunctor<input_t, output_t>(
+    nrows,
+    crow_indices_data_in,
+    data_out);
+
+  sycl_kernel_submit(global_range, local_range, getCurrentSYCLQueue(), functor);
+}
+
+void convert_indices_from_coo_to_csr_structured_kernel(
+    const Tensor& input,
+    const int64_t size,
+    const bool out_int32,
+    const Tensor& result){
+    
+  if (out_int32){
+    AT_DISPATCH_INTEGRAL_TYPES(
+        input.scalar_type(), "convert_indices_from_coo_to_csr_xpu", [&] {
+          launch_convert_indices_from_coo_to_csr_xpu_kernel<scalar_t, int>(
+              result, input, size);
+        });
+  } else {
+    AT_DISPATCH_INTEGRAL_TYPES(
+        input.scalar_type(), "convert_indices_from_coo_to_csr_xpu", [&] {
+          launch_convert_indices_from_coo_to_csr_xpu_kernel<scalar_t, int64_t>(
+              result, input, size);
+        });
+  }
+}
+
+void convert_indices_from_csr_to_coo_structured_kernel(
+    const Tensor& crow_indices,
+    const Tensor& col_indices,
+    const bool out_int32,
+    const bool transpose,
+    const Tensor& result) {
+  if (out_int32) {
+    AT_DISPATCH_INTEGRAL_TYPES(
+        crow_indices.scalar_type(),
+        "convert_indices_from_csr_to_coo_xpu",
+        [&] {
+          launch_convert_indices_from_csr_to_coo_xpu_kernel<scalar_t, int>(
+              result, crow_indices, col_indices, transpose);
+        });
+  } else {
+    AT_DISPATCH_INTEGRAL_TYPES(
+        crow_indices.scalar_type(),
+        "convert_indices_from_coo_to_csr_xpu",
+        [&] {
+          launch_convert_indices_from_csr_to_coo_xpu_kernel<scalar_t, int64_t>(
+              result, crow_indices, col_indices, transpose);
+        });
+  }
+}
+} // namespace at::native::xpu
+
+
+
diff --git a/src/ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h b/src/ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h
new file mode 100644
index 000000000..0f1e49986
--- /dev/null
+++ b/src/ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <ATen/native/SparseTensorUtils.h>
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+TORCH_XPU_API void convert_indices_from_coo_to_csr_structured_kernel(
+    const Tensor& input,
+    const int64_t size,
+    const bool out_int32,
+    const Tensor& result);
+
+TORCH_XPU_API void convert_indices_from_csr_to_coo_structured_kernel(
+    const Tensor& crow_indices,
+    const Tensor& col_indices,
+    const bool out_int32,
+    const bool transpose,
+    const Tensor& result);
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
index 77b44dae1..99f2bf04b 100644
--- a/src/ATen/native/xpu/XPUFallback.template
+++ b/src/ATen/native/xpu/XPUFallback.template
@@ -208,7 +208,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "lu_unpack.out",
     "ormqr",
     "_scaled_mm",
-    "_to_sparse_csr",
     "triangular_solve.X",
     "_validate_compressed_sparse_indices",
     "vdot",
diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py
index 3da62ae66..e6aa1e59d 100644
--- a/test/xpu/skip_list_common.py
+++ b/test/xpu/skip_list_common.py
@@ -53,6 +53,19 @@
         "test_errors_sparse_mul_layout3_xpu",
         "test_out_requires_grad_error_sparse_sampled_addmm_xpu_complex64",
         "test_out_requires_grad_error_sparse_sampled_addmm_xpu_float32",
+        # Similar error as above for to_sparse_csr() operator
+        "test_sparse_csr_from_dense_xpu_bfloat16",
+        "test_sparse_csr_from_dense_xpu_bool",
+        "test_sparse_csr_from_dense_xpu_complex128",
+        "test_sparse_csr_from_dense_xpu_complex64",
+        "test_sparse_csr_from_dense_xpu_float16",
+        "test_sparse_csr_from_dense_xpu_float32",
+        "test_sparse_csr_from_dense_xpu_float64",
+        "test_sparse_csr_from_dense_xpu_int16",
+        "test_sparse_csr_from_dense_xpu_int32",
+        "test_sparse_csr_from_dense_xpu_int64",
+        "test_sparse_csr_from_dense_xpu_int8",
+        "test_sparse_csr_from_dense_xpu_uint8",
         # OneDNN issues, https://github.com/intel/torch-xpu-ops/issues/253
         # RuntimeError: Long is not supported in oneDNN!
         # RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive
diff --git a/test/xpu/test_sparse_xpu.py b/test/xpu/test_sparse_xpu.py
index 26c6cc93f..ce4a1d9a9 100644
--- a/test/xpu/test_sparse_xpu.py
+++ b/test/xpu/test_sparse_xpu.py
@@ -1,7 +1,10 @@
 # Owner(s): ["module: intel"]
-from torch.testing._internal.common_device_type import instantiate_device_type_tests
+from torch.testing._internal.common_device_type import instantiate_device_type_tests, dtypes
+from torch.testing._internal.common_dtype import all_types_and_complex_and
 from torch.testing._internal.common_utils import run_tests
 
+import torch
+
 try:
     from xpu_test_utils import XPUPatchForImport
 except Exception as e:
@@ -10,6 +13,29 @@
 with XPUPatchForImport(False):
     from test_sparse import TestSparse
 
+    #@skipIfTorchDynamo()
+    @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
+    def sparse_csr_from_dense(self, device, dtype):
+        dense = torch.tensor([[4, 5, 0], [0, 0, 0], [1, 0, 0]], dtype=dtype, device=device)
+        sparse = dense.to_sparse_csr()
+        self.assertEqual(torch.tensor([0, 2, 2, 3], dtype=torch.int64), sparse.crow_indices())
+        self.assertEqual(torch.tensor([0, 1, 0], dtype=torch.int64), sparse.col_indices())
+        self.assertEqual(torch.tensor([4, 5, 1], dtype=dtype), sparse.values())
+
+        dense = torch.tensor([[0, 0, 0], [0, 0, 1], [1, 0, 0]], dtype=dtype, device=device)
+        sparse = dense.to_sparse_csr()
+        self.assertEqual(torch.tensor([0, 0, 1, 2], dtype=torch.int64), sparse.crow_indices())
+        self.assertEqual(torch.tensor([2, 0], dtype=torch.int64), sparse.col_indices())
+        self.assertEqual(torch.tensor([1, 1], dtype=dtype), sparse.values())
+
+        dense = torch.tensor([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype=dtype, device=device)
+        sparse = dense.to_sparse_csr()
+        self.assertEqual(torch.tensor([0, 3, 6, 9], dtype=torch.int64), sparse.crow_indices())
+        self.assertEqual(torch.tensor([0, 1, 2] * 3, dtype=torch.int64), sparse.col_indices())
+        self.assertEqual(torch.tensor([2] * 9, dtype=dtype), sparse.values())
+    
+    TestSparse.test_sparse_csr_from_dense = sparse_csr_from_dense
+
 instantiate_device_type_tests(TestSparse, globals(), only_for="xpu", allow_xpu=True)
 
 if __name__ == "__main__":
diff --git a/yaml/native/native_functions.yaml b/yaml/native/native_functions.yaml
index a8367f060..4c2c4fa6c 100644
--- a/yaml/native/native_functions.yaml
+++ b/yaml/native/native_functions.yaml
@@ -5232,6 +5232,22 @@
   dispatch:
     XPU: searchsorted_out_xpu
 
+- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
+  structured_delegate: _convert_indices_from_coo_to_csr.out
+  
+- func: _convert_indices_from_coo_to_csr.out(Tensor self, int size, *, bool out_int32=False, Tensor(a!) out) -> Tensor(a!)
+  structured: True
+  dispatch:
+    XPU: _convert_indices_from_coo_to_csr_structured_xpu
+
+- func: _convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor
+  structured_delegate: _convert_indices_from_csr_to_coo.out
+
+- func: _convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)
+  structured: True
+  dispatch:
+    XPU: _convert_indices_from_csr_to_coo_structured_xpu
+
 - func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
   dispatch:
     XPU: bucketize_xpu
@@ -8641,6 +8657,34 @@
   tags: view_copy
   autogen: values_copy.out
 
+- func: crow_indices_copy(Tensor self) -> Tensor
+  variants: function
+  dispatch:
+    CompositeExplicitAutogradNonFunctional: crow_indices_copy
+  tags: view_copy
+  autogen: crow_indices_copy.out
+
+- func: col_indices_copy(Tensor self) -> Tensor
+  variants: function
+  dispatch:
+    CompositeExplicitAutogradNonFunctional: col_indices_copy
+  tags: view_copy
+  autogen: col_indices_copy.out
+
+- func: ccol_indices_copy(Tensor self) -> Tensor
+  variants: function
+  dispatch:
+    CompositeExplicitAutogradNonFunctional: ccol_indices_copy
+  tags: view_copy
+  autogen: ccol_indices_copy.out
+
+- func: row_indices_copy(Tensor self) -> Tensor
+  variants: function
+  dispatch:
+    CompositeExplicitAutogradNonFunctional: row_indices_copy
+  tags: view_copy
+  autogen: row_indices_copy.out
+
 - func: _indices(Tensor(a) self) -> Tensor(a)
   variants: method
   dispatch:
@@ -8700,9 +8744,41 @@
   dispatch:
     SparseXPU: values_sparse
     NestedTensorXPU: values_nested
+    SparseCsrXPU: values_sparse_csr
   device_check: NoCheck
   device_guard: False
 
+- func: crow_indices(Tensor(a) self) -> Tensor(a)
+  variants: method
+  dispatch:
+    SparseCsrXPU: crow_indices_sparse_csr
+    CompositeExplicitAutograd: crow_indices_default
+  device_check: NoCheck
+  device_guard: False
+
+- func: col_indices(Tensor(a) self) -> Tensor(a)
+  variants: method
+  dispatch:
+    SparseCsrXPU: col_indices_sparse_csr
+    CompositeExplicitAutograd: col_indices_default
+  device_check: NoCheck
+  device_guard: False
+
+- func: ccol_indices(Tensor(a) self) -> Tensor(a)
+  variants: method
+  dispatch:
+    SparseCsrXPU: ccol_indices_sparse_csr
+    CompositeExplicitAutograd: ccol_indices_default
+  device_check: NoCheck
+  device_guard: False
+
+- func: row_indices(Tensor(a) self) -> Tensor(a)
+  variants: method
+  dispatch:
+    SparseCsrXPU: row_indices_sparse_csr
+    CompositeExplicitAutograd: row_indices_default
+  device_check: NoCheck
+
 # Special case of to_sparse.sparse_dim with custom derivative
 - func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
   variants: method
@@ -8722,6 +8798,35 @@
     SparseXPU: sparse_coo_to_sparse
   autogen: _to_sparse.out
 
+- func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
+  variants: method
+
+# Special case of to_sparse_csr with custom derivative
+- func: _to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
+  variants: method
+  dispatch:
+    XPU: dense_to_sparse_csr
+    SparseXPU: coo_to_sparse_csr
+    SparseCsrXPU: sparse_compressed_to_sparse_csr
+  autogen: _to_sparse_csr.out
+
+- func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
+  variants: method
+
+# Special case of to_sparse_csc with custom derivative
+- func: _to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
+  variants: method
+  dispatch:
+    XPU: dense_to_sparse_csc
+    SparseXPU: coo_to_sparse_csc
+    SparseCsrXPU: sparse_compressed_to_sparse_csc
+  autogen: _to_sparse_csc.out
+  
+# - func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
+#   variants: function
+#   dispatch:
+#     XPU: _to_sparse_semi_structured
+
 - func: copy_sparse_to_sparse_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
   device_check: NoCheck  # Allows copy into different device
   variants: function
diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml
index 6af8143f0..ef3b6e3c8 100644
--- a/yaml/xpu_functions.yaml
+++ b/yaml/xpu_functions.yaml
@@ -747,3 +747,7 @@ supported:
   - take.out
   - segment_reduce
   - _segment_reduce_backward
+  - _convert_indices_from_coo_to_csr
+  - _convert_indices_from_coo_to_csr.out
+  - _convert_indices_from_csr_to_coo
+  - _convert_indices_from_csr_to_coo.out
\ No newline at end of file