From 2b9529540c1a7e8468c116d80538769f7b7038e5 Mon Sep 17 00:00:00 2001 From: ember arlynx Date: Thu, 20 Mar 2025 15:19:08 -0400 Subject: [PATCH 1/4] upgrade deps --- Cargo.toml | 8 ++++---- rust-toolchain | 2 +- src/error.rs | 14 +++++++++++--- src/opencl/mod.rs | 4 ++-- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 15ad695..b7c8e76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" homepage = "https://github.com/filecoin-project/rust-gpu-tools" license = "MIT/Apache-2.0" repository = "https://github.com/filecoin-project/rust-gpu-tools" -rust-version = "1.70.0" +rust-version = "1.81.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] @@ -18,11 +18,11 @@ cuda = ["rustacuda"] [dependencies] home = "0.5" sha2 = "0.10" -thiserror = "1.0.10" -log = "0.4.11" +thiserror = "2.0.12" +log = "0.4.26" hex = "0.4.3" -opencl3 = { version = "0.9.3", default-features = false, features = ["CL_VERSION_1_2"], optional = true } +opencl3 = { version = "0.11.0", default-features = false, features = ["CL_VERSION_1_2"], optional = true } rustacuda = { package = "fil-rustacuda", version = "0.1.3", optional = true } once_cell = "1.8.0" temp-env = "0.3.3" diff --git a/rust-toolchain b/rust-toolchain index 832e9af..dbd4126 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.70.0 +1.81.0 diff --git a/src/error.rs b/src/error.rs index a260e50..9bf382a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,11 +9,16 @@ use rustacuda::error::CudaError; pub enum GPUError { /// Error from the underlying `opencl3` library, e.g. a memory allocation failure. #[cfg(feature = "opencl")] - #[error("Opencl3 Error: {0}{}", match .1 { + #[error("Opencl3 Error: {0}{}", match .message { Some(message) => format!(" {}", message), None => "".to_string(), })] - Opencl3(ClError, Option), + Opencl3 { + /// The error code. + error: ClError, + /// The error message. + message: Option, + }, /// Error for OpenCL `clGetProgramInfo()` call failures. #[cfg(feature = "opencl")] @@ -63,6 +68,9 @@ pub type GPUResult = std::result::Result; #[cfg(feature = "opencl")] impl From for GPUError { fn from(error: ClError) -> Self { - GPUError::Opencl3(error, None) + GPUError::Opencl3 { + error, + message: None, + } } } diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index c3e6068..62e3d6c 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -152,7 +152,7 @@ impl Program { let mut program = opencl3::program::Program::create_from_source(&context, src)?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3(build_error, Some(log))); + return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); } debug!( "Building kernel ({}) from source: done.", @@ -191,7 +191,7 @@ impl Program { }?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3(build_error, Some(log))); + return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); } let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; From e19eb14169c9f15f306317c5b5dee37cf48b5b7f Mon Sep 17 00:00:00 2001 From: ember arlynx Date: Fri, 21 Mar 2025 03:09:36 -0400 Subject: [PATCH 2/4] fix clippy --- src/cuda/mod.rs | 20 ++++++++------------ src/cuda/utils.rs | 1 + src/opencl/mod.rs | 16 +++++++++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs index cb4b8ec..b65fd8b 100644 --- a/src/cuda/mod.rs +++ b/src/cuda/mod.rs @@ -6,7 +6,7 @@ //! //! 1. RustaCUDA doesn't expose a higher level function to launch a kernel on the default stream //! 2. There was a bug, when the default stream was used implicitly via RustaCUDA's synchronuous -//! copy methods. To prevent such kind of bugs, be explicit which stream is used. +//! copy methods. To prevent such kind of bugs, be explicit which stream is used. pub(crate) mod utils; @@ -133,13 +133,11 @@ impl Program { pub fn from_binary(device: &Device, filename: &CStr) -> GPUResult { debug!("Creating CUDA program from binary file."); rustacuda::context::CurrentContext::set_current(&device.context)?; - let module = rustacuda::module::Module::load_from_file(filename).map_err(|err| { + let module = rustacuda::module::Module::load_from_file(filename).inspect_err(|_err| { Self::pop_context(); - err })?; - let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| { + let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| { Self::pop_context(); - err })?; let prog = Program { module, @@ -155,13 +153,11 @@ impl Program { pub fn from_bytes(device: &Device, bytes: &[u8]) -> GPUResult { debug!("Creating CUDA program from bytes."); rustacuda::context::CurrentContext::set_current(&device.context)?; - let module = rustacuda::module::Module::load_from_bytes(bytes).map_err(|err| { + let module = rustacuda::module::Module::load_from_bytes(bytes).inspect_err(|_err| { Self::pop_context(); - err })?; - let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| { + let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| { Self::pop_context(); - err })?; let prog = Program { module, @@ -204,7 +200,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len) + std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // It is only unsafe as long as the buffer isn't initialized, but that's what we do next. @@ -246,7 +242,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { std::slice::from_raw_parts( - data.as_ptr() as *const T as *const u8, + data.as_ptr() as *const u8, mem::size_of_val(data), ) }; @@ -265,7 +261,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut T as *mut u8, + data.as_mut_ptr() as *mut u8, mem::size_of_val(data), ) }; diff --git a/src/cuda/utils.rs b/src/cuda/utils.rs index 21da312..56f2da8 100644 --- a/src/cuda/utils.rs +++ b/src/cuda/utils.rs @@ -11,6 +11,7 @@ use crate::error::{GPUError, GPUResult}; // are never used directly, they are only accessed through [`cuda::Device`] which contains an // `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned, // but that is needed for creating the kernels. +#[allow(dead_code)] pub(crate) struct CudaContexts(Vec); unsafe impl Sync for CudaContexts {} unsafe impl Send for CudaContexts {} diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index 62e3d6c..75173d3 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -152,7 +152,10 @@ impl Program { let mut program = opencl3::program::Program::create_from_source(&context, src)?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); + return Err(GPUError::Opencl3 { + error: build_error, + message: Some(log), + }); } debug!( "Building kernel ({}) from source: done.", @@ -191,7 +194,10 @@ impl Program { }?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3 { error: build_error, message: Some(log) }); + return Err(GPUError::Opencl3 { + error: build_error, + message: Some(log), + }); } let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; @@ -259,7 +265,7 @@ impl Program { }; // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len) + std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // Write some data right-away. This makes a significant performance different. unsafe { @@ -315,7 +321,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { std::slice::from_raw_parts( - data.as_ptr() as *const T as *const u8, + data.as_ptr() as *const u8, mem::size_of_val(data), ) }; @@ -333,7 +339,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut T as *mut u8, + data.as_mut_ptr() as *mut u8, mem::size_of_val(data), ) }; From bbbefbfa353c8979982f3fa88853bf122b219150 Mon Sep 17 00:00:00 2001 From: ember arlynx Date: Thu, 27 Mar 2025 23:43:39 -0400 Subject: [PATCH 3/4] allow(unused) + cargo fmt --- src/cuda/mod.rs | 14 +++----------- src/cuda/utils.rs | 3 +-- src/opencl/mod.rs | 14 +++----------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs index b65fd8b..bdcf83c 100644 --- a/src/cuda/mod.rs +++ b/src/cuda/mod.rs @@ -199,9 +199,7 @@ impl Program { let bytes_len = mem::size_of_val(slice); // Transmuting types is safe as long a sizes match. - let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) - }; + let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // It is only unsafe as long as the buffer isn't initialized, but that's what we do next. let mut buffer = unsafe { DeviceBuffer::::uninitialized(bytes_len)? }; @@ -241,10 +239,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) }; // It is safe as we synchronize the stream after the call. @@ -260,10 +255,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) }; // It is safe as we synchronize the stream after the call. diff --git a/src/cuda/utils.rs b/src/cuda/utils.rs index 56f2da8..207d658 100644 --- a/src/cuda/utils.rs +++ b/src/cuda/utils.rs @@ -11,8 +11,7 @@ use crate::error::{GPUError, GPUResult}; // are never used directly, they are only accessed through [`cuda::Device`] which contains an // `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned, // but that is needed for creating the kernels. -#[allow(dead_code)] -pub(crate) struct CudaContexts(Vec); +pub(crate) struct CudaContexts(#[allow(unused)] Vec); unsafe impl Sync for CudaContexts {} unsafe impl Send for CudaContexts {} diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index 75173d3..c8e32b4 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -264,9 +264,7 @@ impl Program { )? }; // Transmuting types is safe as long a sizes match. - let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) - }; + let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // Write some data right-away. This makes a significant performance different. unsafe { self.queue @@ -320,10 +318,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) }; unsafe { self.queue @@ -338,10 +333,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) }; unsafe { self.queue From 183877b13f311560817c027dc7d9f45760617702 Mon Sep 17 00:00:00 2001 From: Brandon Ros Date: Mon, 20 Feb 2023 15:40:09 -0500 Subject: [PATCH 4/4] Extend support for Apple Silicon --- src/device.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/device.rs b/src/device.rs index b5c5abe..5412cc2 100644 --- a/src/device.rs +++ b/src/device.rs @@ -28,6 +28,8 @@ const AMD_DEVICE_ON_APPLE_VENDOR_STRING: &str = "AMD"; const AMD_DEVICE_ON_APPLE_VENDOR_ID: u32 = 0x1021d00; const NVIDIA_DEVICE_VENDOR_STRING: &str = "NVIDIA Corporation"; const NVIDIA_DEVICE_VENDOR_ID: u32 = 0x10de; +const APPLE_DEVICE_VENDOR_ID: u32 = 0x1027F00; +const APPLE_DEVICE_VENDOR_STRING: &str = "Apple"; // The owned CUDA contexts are stored globally. Each devives contains an unowned reference, so // that devices can be cloned. @@ -180,6 +182,8 @@ pub enum Vendor { Intel, /// GPU by NVIDIA. Nvidia, + /// GPU by Apple. + Apple, } impl TryFrom<&str> for Vendor { @@ -191,6 +195,7 @@ impl TryFrom<&str> for Vendor { AMD_DEVICE_ON_APPLE_VENDOR_STRING => Ok(Self::Amd), INTEL_DEVICE_VENDOR_STRING => Ok(Self::Intel), NVIDIA_DEVICE_VENDOR_STRING => Ok(Self::Nvidia), + APPLE_DEVICE_VENDOR_STRING => Ok(Self::Apple), _ => Err(GPUError::UnsupportedVendor(vendor.to_string())), } } @@ -205,6 +210,7 @@ impl TryFrom for Vendor { AMD_DEVICE_ON_APPLE_VENDOR_ID => Ok(Self::Amd), INTEL_DEVICE_VENDOR_ID => Ok(Self::Intel), NVIDIA_DEVICE_VENDOR_ID => Ok(Self::Nvidia), + APPLE_DEVICE_VENDOR_ID => Ok(Self::Apple), _ => Err(GPUError::UnsupportedVendor(format!("0x{:x}", vendor))), } } @@ -216,6 +222,7 @@ impl fmt::Display for Vendor { Self::Amd => AMD_DEVICE_VENDOR_STRING, Self::Intel => INTEL_DEVICE_VENDOR_STRING, Self::Nvidia => NVIDIA_DEVICE_VENDOR_STRING, + Self::Apple => APPLE_DEVICE_VENDOR_STRING, }; write!(f, "{}", vendor) }