filecoin-project · emberian · Mar 20, 2025 · Mar 21, 2025 · Mar 28, 2025 · Feb 20, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2021"
 homepage = "https://github.com/filecoin-project/rust-gpu-tools"
 license = "MIT/Apache-2.0"
 repository = "https://github.com/filecoin-project/rust-gpu-tools"
-rust-version = "1.70.0"
+rust-version = "1.81.0"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [features]
@@ -18,11 +18,11 @@ cuda = ["rustacuda"]
 [dependencies]
 home = "0.5"
 sha2 = "0.10"
-thiserror = "1.0.10"
-log = "0.4.11"
+thiserror = "2.0.12"
+log = "0.4.26"
 hex = "0.4.3"
 
-opencl3 = { version = "0.9.3", default-features = false, features = ["CL_VERSION_1_2"], optional = true }
+opencl3 = { version = "0.11.0", default-features = false, features = ["CL_VERSION_1_2"], optional = true }
 rustacuda = { package = "fil-rustacuda", version = "0.1.3", optional = true }
 once_cell = "1.8.0"
 temp-env = "0.3.3"
diff --git a/rust-toolchain b/rust-toolchain
@@ -1 +1 @@
-1.70.0
+1.81.0
diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs
@@ -6,7 +6,7 @@
 //!
 //!   1. RustaCUDA doesn't expose a higher level function to launch a kernel on the default stream
 //!   2. There was a bug, when the default stream was used implicitly via RustaCUDA's synchronuous
-//!   copy methods. To prevent such kind of bugs, be explicit which stream is used.
+//!      copy methods. To prevent such kind of bugs, be explicit which stream is used.
 
 pub(crate) mod utils;
 
@@ -133,13 +133,11 @@ impl Program {
     pub fn from_binary(device: &Device, filename: &CStr) -> GPUResult<Program> {
         debug!("Creating CUDA program from binary file.");
         rustacuda::context::CurrentContext::set_current(&device.context)?;
-        let module = rustacuda::module::Module::load_from_file(filename).map_err(|err| {
+        let module = rustacuda::module::Module::load_from_file(filename).inspect_err(|_err| {
             Self::pop_context();
-            err
         })?;
-        let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| {
+        let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| {
             Self::pop_context();
-            err
         })?;
         let prog = Program {
             module,
@@ -155,13 +153,11 @@ impl Program {
     pub fn from_bytes(device: &Device, bytes: &[u8]) -> GPUResult<Program> {
         debug!("Creating CUDA program from bytes.");
         rustacuda::context::CurrentContext::set_current(&device.context)?;
-        let module = rustacuda::module::Module::load_from_bytes(bytes).map_err(|err| {
+        let module = rustacuda::module::Module::load_from_bytes(bytes).inspect_err(|_err| {
             Self::pop_context();
-            err
         })?;
-        let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| {
+        let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| {
             Self::pop_context();
-            err
         })?;
         let prog = Program {
             module,
@@ -203,9 +199,7 @@ impl Program {
         let bytes_len = mem::size_of_val(slice);
 
         // Transmuting types is safe as long a sizes match.
-        let bytes = unsafe {
-            std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len)
-        };
+        let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) };
 
         // It is only unsafe as long as the buffer isn't initialized, but that's what we do next.
         let mut buffer = unsafe { DeviceBuffer::<u8>::uninitialized(bytes_len)? };
@@ -245,10 +239,7 @@ impl Program {
 
         // Transmuting types is safe as long a sizes match.
         let bytes = unsafe {
-            std::slice::from_raw_parts(
-                data.as_ptr() as *const T as *const u8,
-                mem::size_of_val(data),
-            )
+            std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data))
         };
 
         // It is safe as we synchronize the stream after the call.
@@ -264,10 +255,7 @@ impl Program {
 
         // Transmuting types is safe as long a sizes match.
         let bytes = unsafe {
-            std::slice::from_raw_parts_mut(
-                data.as_mut_ptr() as *mut T as *mut u8,
-                mem::size_of_val(data),
-            )
+            std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data))
         };
 
         // It is safe as we synchronize the stream after the call.

diff --git a/src/cuda/utils.rs b/src/cuda/utils.rs
@@ -11,7 +11,7 @@ use crate::error::{GPUError, GPUResult};
 // are never used directly, they are only accessed through [`cuda::Device`] which contains an
 // `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned,
 // but that is needed for creating the kernels.
-pub(crate) struct CudaContexts(Vec<rustacuda::context::Context>);
+pub(crate) struct CudaContexts(#[allow(unused)] Vec<rustacuda::context::Context>);
 unsafe impl Sync for CudaContexts {}
 unsafe impl Send for CudaContexts {}
 

diff --git a/src/device.rs b/src/device.rs
@@ -28,6 +28,8 @@ const AMD_DEVICE_ON_APPLE_VENDOR_STRING: &str = "AMD";
 const AMD_DEVICE_ON_APPLE_VENDOR_ID: u32 = 0x1021d00;
 const NVIDIA_DEVICE_VENDOR_STRING: &str = "NVIDIA Corporation";
 const NVIDIA_DEVICE_VENDOR_ID: u32 = 0x10de;
+const APPLE_DEVICE_VENDOR_ID: u32 = 0x1027F00;
+const APPLE_DEVICE_VENDOR_STRING: &str = "Apple";
 
 // The owned CUDA contexts are stored globally. Each devives contains an unowned reference, so
 // that devices can be cloned.
@@ -180,6 +182,8 @@ pub enum Vendor {
     Intel,
     /// GPU by NVIDIA.
     Nvidia,
+    /// GPU by Apple.
+    Apple,
 }
 
 impl TryFrom<&str> for Vendor {
@@ -191,6 +195,7 @@ impl TryFrom<&str> for Vendor {
             AMD_DEVICE_ON_APPLE_VENDOR_STRING => Ok(Self::Amd),
             INTEL_DEVICE_VENDOR_STRING => Ok(Self::Intel),
             NVIDIA_DEVICE_VENDOR_STRING => Ok(Self::Nvidia),
+            APPLE_DEVICE_VENDOR_STRING => Ok(Self::Apple),
             _ => Err(GPUError::UnsupportedVendor(vendor.to_string())),
         }
     }
@@ -205,6 +210,7 @@ impl TryFrom<u32> for Vendor {
             AMD_DEVICE_ON_APPLE_VENDOR_ID => Ok(Self::Amd),
             INTEL_DEVICE_VENDOR_ID => Ok(Self::Intel),
             NVIDIA_DEVICE_VENDOR_ID => Ok(Self::Nvidia),
+            APPLE_DEVICE_VENDOR_ID => Ok(Self::Apple),
             _ => Err(GPUError::UnsupportedVendor(format!("0x{:x}", vendor))),
         }
     }
@@ -216,6 +222,7 @@ impl fmt::Display for Vendor {
             Self::Amd => AMD_DEVICE_VENDOR_STRING,
             Self::Intel => INTEL_DEVICE_VENDOR_STRING,
             Self::Nvidia => NVIDIA_DEVICE_VENDOR_STRING,
+            Self::Apple => APPLE_DEVICE_VENDOR_STRING,
         };
         write!(f, "{}", vendor)
     }

diff --git a/src/error.rs b/src/error.rs
@@ -9,11 +9,16 @@ use rustacuda::error::CudaError;
 pub enum GPUError {
     /// Error from the underlying `opencl3` library, e.g. a memory allocation failure.
     #[cfg(feature = "opencl")]
-    #[error("Opencl3 Error: {0}{}", match .1 {
+    #[error("Opencl3 Error: {0}{}", match .message {
        Some(message) => format!(" {}", message),
        None => "".to_string(),
     })]
-    Opencl3(ClError, Option<String>),
+    Opencl3 {
+        /// The error code.
+        error: ClError,
+        /// The error message.
+        message: Option<String>,
+    },
 
     /// Error for OpenCL `clGetProgramInfo()` call failures.
     #[cfg(feature = "opencl")]
@@ -63,6 +68,9 @@ pub type GPUResult<T> = std::result::Result<T, GPUError>;
 #[cfg(feature = "opencl")]
 impl From<ClError> for GPUError {
     fn from(error: ClError) -> Self {
-        GPUError::Opencl3(error, None)
+        GPUError::Opencl3 {
+            error,
+            message: None,
+        }
     }
 }
diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs
@@ -152,7 +152,10 @@ impl Program {
             let mut program = opencl3::program::Program::create_from_source(&context, src)?;
             if let Err(build_error) = program.build(context.devices(), "") {
                 let log = program.get_build_log(context.devices()[0])?;
-                return Err(GPUError::Opencl3(build_error, Some(log)));
+                return Err(GPUError::Opencl3 {
+                    error: build_error,
+                    message: Some(log),
+                });
             }
             debug!(
                 "Building kernel ({}) from source: done.",
@@ -191,7 +194,10 @@ impl Program {
         }?;
         if let Err(build_error) = program.build(context.devices(), "") {
             let log = program.get_build_log(context.devices()[0])?;
-            return Err(GPUError::Opencl3(build_error, Some(log)));
+            return Err(GPUError::Opencl3 {
+                error: build_error,
+                message: Some(log),
+            });
         }
         let queue = CommandQueue::create_default(&context, 0)?;
         let kernels = opencl3::kernel::create_program_kernels(&program)?;
@@ -258,9 +264,7 @@ impl Program {
             )?
         };
         // Transmuting types is safe as long a sizes match.
-        let bytes = unsafe {
-            std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len)
-        };
+        let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) };
         // Write some data right-away. This makes a significant performance different.
         unsafe {
             self.queue
@@ -314,10 +318,7 @@ impl Program {
 
         // It is safe as long as the sizes match.
         let bytes = unsafe {
-            std::slice::from_raw_parts(
-                data.as_ptr() as *const T as *const u8,
-                mem::size_of_val(data),
-            )
+            std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data))
         };
         unsafe {
             self.queue
@@ -332,10 +333,7 @@ impl Program {
 
         // It is safe as long as the sizes match.
         let bytes = unsafe {
-            std::slice::from_raw_parts_mut(
-                data.as_mut_ptr() as *mut T as *mut u8,
-                mem::size_of_val(data),
-            )
+            std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data))
         };
         unsafe {
             self.queue