dmlc · trivialfis · Nov 21, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
@@ -855,22 +855,22 @@ class Tensor {
    * @brief Get a @ref TensorView for this tensor.
    */
   auto View(DeviceOrd device) {
-    if (device.IsCUDA()) {
-      data_.SetDevice(device);
-      auto span = data_.DeviceSpan();
+    if (device.IsCPU()) {
+      auto span = data_.HostSpan();
       return TensorView<T, kDim>{span, shape_, device, order_};
     } else {
-      auto span = data_.HostSpan();
+      data_.SetDevice(device);
+      auto span = data_.DeviceSpan();
       return TensorView<T, kDim>{span, shape_, device, order_};
     }
   }
   auto View(DeviceOrd device) const {
-    if (device.IsCUDA()) {
-      data_.SetDevice(device);
-      auto span = data_.ConstDeviceSpan();
+    if (device.IsCPU()) {
+      auto span = data_.ConstHostSpan();
       return TensorView<T const, kDim>{span, shape_, device, order_};
     } else {
-      auto span = data_.ConstHostSpan();
+      data_.SetDevice(device);
+      auto span = data_.ConstDeviceSpan();
       return TensorView<T const, kDim>{span, shape_, device, order_};
     }
   }

diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt
@@ -1,6 +1,10 @@
 if(PLUGIN_SYCL)
   set(CMAKE_CXX_COMPILER "icpx")
   file(GLOB_RECURSE SYCL_SOURCES "sycl/*.cc")
+    list(APPEND SYCL_SOURCES
+    ${xgboost_SOURCE_DIR}/src/objective/regression_obj.cc
+    ${xgboost_SOURCE_DIR}/src/objective/hinge.cc
+    ${xgboost_SOURCE_DIR}/src/objective/quantile_obj.cc)
   add_library(plugin_sycl OBJECT ${SYCL_SOURCES})
   target_include_directories(plugin_sycl
     PRIVATE

diff --git a/plugin/sycl/common/linalg_op.h b/plugin/sycl/common/linalg_op.h
@@ -9,13 +9,17 @@
 #include <utility>
 
 #include "../data.h"
+#include "../device_manager.h"
 
 #include <sycl/sycl.hpp>
 
 namespace xgboost {
 namespace sycl {
 namespace linalg {
 
+template<typename T, std::int32_t D>
+using TensorView = xgboost::linalg::TensorView<T, D>;
+
 struct WorkGroupsParams {
   size_t n_workgroups;
   size_t workgroup_size;
@@ -40,7 +44,44 @@ ::sycl::event GroupWiseKernel(::sycl::queue* qu, int* flag_ptr,
   });
   return event;
 }
+
+template<typename Fn, typename TupleType, size_t ... I>
+auto call(Fn&& fn, TupleType t, std::index_sequence<I ...>) {
+     return fn(std::get<I>(t) ...);
+}
+
+template<typename Fn, typename TupleType>
+auto call(Fn&& fn, TupleType t) {
+    static constexpr auto size = std::tuple_size<TupleType>::value;
+    return call(fn, t, std::make_index_sequence<size>{});
+}
+
+template <typename T, int32_t D, typename Fn>
+void ElementWiseKernel(TensorView<T, D> t, Fn&& fn) {
+  sycl::DeviceManager device_manager;
+  auto* qu = device_manager.GetQueue(t.Device());
+  qu->submit([&](::sycl::handler& cgh) {
+    cgh.parallel_for<>(::sycl::range<1>(t.Size()),
+                       [=](::sycl::id<1> pid) {
+      const size_t idx = pid[0];
+      call(const_cast<Fn&&>(fn), xgboost::linalg::UnravelIndex(idx, t.Shape()));
+    });
+  }).wait_and_throw();
+}
+
 }  // namespace linalg
 }  // namespace sycl
+
+namespace linalg {
+template <typename T, int32_t D, typename Fn>
+void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  if (ctx->IsSycl()) {
+    sycl::linalg::ElementWiseKernel(t, fn);
+  } else {
+    ElementWiseKernelHost(t, ctx->Threads(), fn);
+  }
+}
+
+}  // namespace linalg
 }  // namespace xgboost
 #endif  // PLUGIN_SYCL_COMMON_LINALG_OP_H_
diff --git a/plugin/sycl/objective/regression_obj.cc b/plugin/sycl/objective/regression_obj.cc
@@ -195,86 +195,6 @@ XGBOOST_REGISTER_OBJECTIVE(LogisticRaw,
           "before logistic transformation with SYCL backend.")
 .set_body([]() { return new RegLossObj<xgboost::obj::LogisticRaw>(); });
 
-class MeanAbsoluteError : public ObjFunction {
- public:
-  void Configure(Args const&) override {}
-
-  ObjInfo Task() const override {
-    return {ObjInfo::kRegression, true, true};
-  }
-
-  bst_target_t Targets(MetaInfo const& info) const override {
-    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));
-  }
-
-  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info,
-                   std::int32_t, xgboost::linalg::Matrix<GradientPair>* out_gpair) override {
-    if (qu_ == nullptr) {
-      qu_ = device_manager.GetQueue(ctx_->Device());
-    }
-
-    size_t const ndata = preds.Size();
-    auto const n_targets = this->Targets(info);
-
-    xgboost::obj::CheckInitInputs(info);
-    CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
-    const bst_float* label_ptr = info.labels.Data()->ConstDevicePointer();
-
-    out_gpair->SetDevice(ctx_->Device());
-    out_gpair->Reshape(info.num_row_, this->Targets(info));
-    GradientPair* out_gpair_ptr  = out_gpair->Data()->DevicePointer();
-
-    preds.SetDevice(ctx_->Device());
-    const bst_float* preds_ptr = preds.ConstDevicePointer();
-    auto predt = xgboost::linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));
-    info.weights_.SetDevice(ctx_->Device());
-    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
-                                                 : info.weights_.ConstDeviceSpan()};
-
-    qu_->submit([&](::sycl::handler& cgh) {
-      cgh.parallel_for<>(::sycl::range<1>(ndata), [=](::sycl::id<1> pid) {
-        int idx = pid[0];
-        auto sign = [](auto x) {
-          return (x > static_cast<decltype(x)>(0)) - (x < static_cast<decltype(x)>(0));
-        };
-        const bst_float pred = preds_ptr[idx];
-        const bst_float label = label_ptr[idx];
-
-        bst_float hess = weight[idx/n_targets];
-        bst_float grad = sign(pred - label) * hess;
-        out_gpair_ptr[idx] = GradientPair{grad, hess};
-      });
-    });
-    qu_->wait_and_throw();
-  }
-
-  void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
-                      float learning_rate, HostDeviceVector<float> const& prediction,
-                      std::int32_t group_idx, RegTree* p_tree) const override {
-    ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction, 0.5,
-                                   p_tree);
-  }
-
-  const char* DefaultEvalMetric() const override { return "mae"; }
-
-  void SaveConfig(Json* p_out) const override {
-    auto& out = *p_out;
-    out["name"] = String("reg:absoluteerror");
-  }
-
-  void LoadConfig(Json const& in) override {
-    CHECK_EQ(StringView{get<String const>(in["name"])}, StringView{"reg:absoluteerror"});
-  }
-
- protected:
-  sycl::DeviceManager device_manager;
-  mutable ::sycl::queue* qu_ = nullptr;
-};
-
-XGBOOST_REGISTER_OBJECTIVE(MeanAbsoluteError, "reg:absoluteerror_sycl")
-    .describe("Mean absoluate error.")
-    .set_body([]() { return new MeanAbsoluteError(); });
-
 }  // namespace obj
 }  // namespace sycl
 }  // namespace xgboost
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -1,6 +1,12 @@
 file(GLOB_RECURSE CPU_SOURCES *.cc *.h)
 list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/cli_main.cc)
 
+if(PLUGIN_SYCL)
+  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/regression_obj.cc)
+  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/hinge.cc)
+  list(REMOVE_ITEM CPU_SOURCES ${xgboost_SOURCE_DIR}/src/objective/quantile_obj.cc)
+endif()
+
 #-- Object library
 # Object library is necessary for jvm-package, which creates its own shared library.
 add_library(objxgboost OBJECT)

diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
@@ -49,6 +49,7 @@ void ElementWiseKernelHost(linalg::TensorView<T, D> t, std::int32_t n_threads, F
 }
 
 #if !defined(XGBOOST_USE_CUDA)
+#if !defined(XGBOOST_USE_SYCL)
 template <typename T, int32_t D, typename Fn>
 void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
   common::AssertGPUSupport();
@@ -66,6 +67,7 @@ void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn)
   }
   ElementWiseKernelHost(t, ctx->Threads(), fn);
 }
+#endif  // !defined(XGBOOST_USE_SYCL)
 #endif  // !defined(XGBOOST_USE_CUDA)
 
 template <typename T, std::int32_t kDim>

diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu
@@ -45,7 +45,7 @@ namespace {
 template <typename Fn>
 PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss) {
   PackedReduceResult result;
-  auto labels = info.labels.View(ctx->Device());
+  auto labels = info.labels.View(ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device());
   if (ctx->IsCUDA()) {
 #if defined(XGBOOST_USE_CUDA)
     thrust::counting_iterator<size_t> begin(0);
@@ -182,10 +182,11 @@ class PseudoErrorLoss : public MetricNoCache {
 
   double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {
     CHECK_EQ(info.labels.Shape(0), info.num_row_);
-    auto labels = info.labels.View(ctx_->Device());
-    preds.SetDevice(ctx_->Device());
+    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();
+    auto labels = info.labels.View(device);
+    preds.SetDevice(device);
     auto predts = ctx_->IsCUDA() ? preds.ConstDeviceSpan() : preds.ConstHostSpan();
-    info.weights_.SetDevice(ctx_->Device());
+    info.weights_.SetDevice(device);
     common::OptionalWeights weights(ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
                                                    : info.weights_.ConstHostSpan());
     float slope = this->param_.huber_slope;
@@ -349,11 +350,12 @@ struct EvalEWiseBase : public MetricNoCache {
     if (info.labels.Size() != 0) {
       CHECK_NE(info.labels.Shape(1), 0);
     }
-    auto labels = info.labels.View(ctx_->Device());
-    info.weights_.SetDevice(ctx_->Device());
+    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();
+    auto labels = info.labels.View(device);
+    info.weights_.SetDevice(device);
     common::OptionalWeights weights(ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
                                                    : info.weights_.ConstHostSpan());
-    preds.SetDevice(ctx_->Device());
+    preds.SetDevice(device);
     auto predts = ctx_->IsCUDA() ? preds.ConstDeviceSpan() : preds.ConstHostSpan();
 
     auto d_policy = policy_;

diff --git a/src/objective/hinge.cu b/src/objective/hinge.cu
@@ -12,6 +12,9 @@
 #if defined(XGBOOST_USE_CUDA)
 #include "../common/linalg_op.cuh"
 #endif
+#if defined(XGBOOST_USE_SYCL)
+#include "../../plugin/sycl/common/linalg_op.h"
+#endif
 #include "../common/linalg_op.h"
 #include "../common/optional_weight.h"   // for OptionalWeights
 #include "../common/transform.h"         // for Transform
@@ -58,8 +61,8 @@ class HingeObj : public FitIntercept {
     auto labels = info.labels.View(ctx_->Device());
 
     info.weights_.SetDevice(ctx_->Device());
-    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
-                                                  : info.weights_.ConstHostSpan()};
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
 
     linalg::ElementWiseKernel(this->ctx_, labels,
                               [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {

diff --git a/src/objective/objective.cc b/src/objective/objective.cc
@@ -48,7 +48,7 @@ std::string ObjFunction::GetSyclImplementationName(const std::string& name) {
     return name + sycl_postfix;
   } else {
     // Function hasn't specific sycl implementation
-    LOG(FATAL) << "`" << name << "` doesn't have sycl implementation yet\n";
+    // LOG(FATAL) << "`" << name << "` doesn't have sycl implementation yet\n";
     return name;
   }
 }

diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu
@@ -27,6 +27,10 @@
 
 #endif                              // defined(XGBOOST_USE_CUDA)
 
+#if defined(XGBOOST_USE_SYCL)
+#include "../../plugin/sycl/common/linalg_op.h"  // ElementWiseKernel
+#endif
+
 namespace xgboost::obj {
 class QuantileRegression : public ObjFunction {
   common::QuantileLossParam param_;
@@ -71,14 +75,14 @@ class QuantileRegression : public ObjFunction {
     auto gpair = out_gpair->View(ctx_->Device());
 
     info.weights_.SetDevice(ctx_->Device());
-    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
-                                                  : info.weights_.ConstHostSpan()};
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
 
     preds.SetDevice(ctx_->Device());
     auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, n_targets);
 
     alpha_.SetDevice(ctx_->Device());
-    auto alpha = ctx_->IsCUDA() ? alpha_.ConstDeviceSpan() : alpha_.ConstHostSpan();
+    auto alpha = ctx_->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();
 
     linalg::ElementWiseKernel(ctx_, gpair,
                               [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {

diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
@@ -43,6 +43,10 @@
 #include "../common/linalg_op.cuh"
 #endif  // defined(XGBOOST_USE_CUDA)
 
+#if defined(XGBOOST_USE_SYCL)
+#include "../../plugin/sycl/common/linalg_op.h"
+#endif
+
 namespace xgboost::obj {
 namespace {
 void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
@@ -253,8 +257,8 @@ class PseudoHuberRegression : public FitIntercept {
     auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));
 
     info.weights_.SetDevice(ctx_->Device());
-    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
-                                                  : info.weights_.ConstHostSpan()};
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
 
     linalg::ElementWiseKernel(
         ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
@@ -632,8 +636,8 @@ class MeanAbsoluteError : public ObjFunction {
     preds.SetDevice(ctx_->Device());
     auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));
     info.weights_.SetDevice(ctx_->Device());
-    common::OptionalWeights weight{ctx_->IsCUDA() ? info.weights_.ConstDeviceSpan()
-                                                  : info.weights_.ConstHostSpan()};
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
 
     linalg::ElementWiseKernel(
         ctx_, labels, [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {

diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
@@ -720,8 +720,9 @@ class CPUPredictor : public Predictor {
     CHECK_NE(ngroup, 0);
     size_t const ncolumns = num_feature + 1;
     CHECK_NE(ncolumns, 0);
-    auto base_margin = info.base_margin_.View(ctx_->Device());
-    auto base_score = model.learner_model_param->BaseScore(ctx_->Device())(0);
+    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();
+    auto base_margin = info.base_margin_.View(device);
+    auto base_score = model.learner_model_param->BaseScore(device)(0);
 
     // parallel over local batch
     common::ParallelFor(batch.Size(), this->ctx_->Threads(), [&](auto i) {

diff --git a/tests/cpp/objective/test_hinge.cc b/tests/cpp/objective/test_hinge.cc
@@ -6,11 +6,12 @@
 #include <limits>
 
 #include "../helpers.h"
+#include "test_hinge.h"
 #include "../../../src/common/linalg_op.h"
 namespace xgboost {
-TEST(Objective, DeclareUnifiedTest(HingeObj)) {
-  Context ctx = MakeCUDACtx(GPUIDX);
-  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("binary:hinge", &ctx)};
+
+void TestHingeObj(const Context* ctx) {
+  std::unique_ptr<ObjFunction> obj{ObjFunction::Create("binary:hinge", ctx)};
 
   float eps = std::numeric_limits<xgboost::bst_float>::min();
   std::vector<float> predt{-1.0f, -0.5f, 0.5f, 1.0f, -1.0f, -0.5f, 0.5f, 1.0f};

diff --git a/tests/cpp/objective/test_hinge.h b/tests/cpp/objective/test_hinge.h
@@ -0,0 +1,15 @@
+/**
+ * Copyright 2020-2024 by XGBoost Contributors
+ */
+#ifndef XGBOOST_TEST_HINGE_H_
+#define XGBOOST_TEST_HINGE_H_
+
+#include <xgboost/context.h>  // for Context
+
+namespace xgboost {
+
+void TestHingeObj(const Context* ctx);
+
+}  // namespace xgboost
+
+#endif  // XGBOOST_TEST_REGRESSION_OBJ_H_