Skip to content

Extend support for Apple Silicon #92

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
homepage = "https://github.com/filecoin-project/rust-gpu-tools"
license = "MIT/Apache-2.0"
repository = "https://github.com/filecoin-project/rust-gpu-tools"
rust-version = "1.70.0"
rust-version = "1.81.0"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
Expand All @@ -18,11 +18,11 @@ cuda = ["rustacuda"]
[dependencies]
home = "0.5"
sha2 = "0.10"
thiserror = "1.0.10"
log = "0.4.11"
thiserror = "2.0.12"
log = "0.4.26"
hex = "0.4.3"

opencl3 = { version = "0.9.3", default-features = false, features = ["CL_VERSION_1_2"], optional = true }
opencl3 = { version = "0.11.0", default-features = false, features = ["CL_VERSION_1_2"], optional = true }
rustacuda = { package = "fil-rustacuda", version = "0.1.3", optional = true }
once_cell = "1.8.0"
temp-env = "0.3.3"
2 changes: 1 addition & 1 deletion rust-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.70.0
1.81.0
28 changes: 8 additions & 20 deletions src/cuda/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//!
//! 1. RustaCUDA doesn't expose a higher level function to launch a kernel on the default stream
//! 2. There was a bug, when the default stream was used implicitly via RustaCUDA's synchronuous
//! copy methods. To prevent such kind of bugs, be explicit which stream is used.
//! copy methods. To prevent such kind of bugs, be explicit which stream is used.

pub(crate) mod utils;

Expand Down Expand Up @@ -133,13 +133,11 @@ impl Program {
pub fn from_binary(device: &Device, filename: &CStr) -> GPUResult<Program> {
debug!("Creating CUDA program from binary file.");
rustacuda::context::CurrentContext::set_current(&device.context)?;
let module = rustacuda::module::Module::load_from_file(filename).map_err(|err| {
let module = rustacuda::module::Module::load_from_file(filename).inspect_err(|_err| {
Self::pop_context();
err
})?;
let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| {
let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| {
Self::pop_context();
err
})?;
let prog = Program {
module,
Expand All @@ -155,13 +153,11 @@ impl Program {
pub fn from_bytes(device: &Device, bytes: &[u8]) -> GPUResult<Program> {
debug!("Creating CUDA program from bytes.");
rustacuda::context::CurrentContext::set_current(&device.context)?;
let module = rustacuda::module::Module::load_from_bytes(bytes).map_err(|err| {
let module = rustacuda::module::Module::load_from_bytes(bytes).inspect_err(|_err| {
Self::pop_context();
err
})?;
let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| {
let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| {
Self::pop_context();
err
})?;
let prog = Program {
module,
Expand Down Expand Up @@ -203,9 +199,7 @@ impl Program {
let bytes_len = mem::size_of_val(slice);

// Transmuting types is safe as long a sizes match.
let bytes = unsafe {
std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len)
};
let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) };

// It is only unsafe as long as the buffer isn't initialized, but that's what we do next.
let mut buffer = unsafe { DeviceBuffer::<u8>::uninitialized(bytes_len)? };
Expand Down Expand Up @@ -245,10 +239,7 @@ impl Program {

// Transmuting types is safe as long a sizes match.
let bytes = unsafe {
std::slice::from_raw_parts(
data.as_ptr() as *const T as *const u8,
mem::size_of_val(data),
)
std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data))
};

// It is safe as we synchronize the stream after the call.
Expand All @@ -264,10 +255,7 @@ impl Program {

// Transmuting types is safe as long a sizes match.
let bytes = unsafe {
std::slice::from_raw_parts_mut(
data.as_mut_ptr() as *mut T as *mut u8,
mem::size_of_val(data),
)
std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data))
};

// It is safe as we synchronize the stream after the call.
Expand Down
2 changes: 1 addition & 1 deletion src/cuda/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::error::{GPUError, GPUResult};
// are never used directly, they are only accessed through [`cuda::Device`] which contains an
// `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned,
// but that is needed for creating the kernels.
pub(crate) struct CudaContexts(Vec<rustacuda::context::Context>);
pub(crate) struct CudaContexts(#[allow(unused)] Vec<rustacuda::context::Context>);
unsafe impl Sync for CudaContexts {}
unsafe impl Send for CudaContexts {}

Expand Down
7 changes: 7 additions & 0 deletions src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ const AMD_DEVICE_ON_APPLE_VENDOR_STRING: &str = "AMD";
const AMD_DEVICE_ON_APPLE_VENDOR_ID: u32 = 0x1021d00;
const NVIDIA_DEVICE_VENDOR_STRING: &str = "NVIDIA Corporation";
const NVIDIA_DEVICE_VENDOR_ID: u32 = 0x10de;
const APPLE_DEVICE_VENDOR_ID: u32 = 0x1027F00;
const APPLE_DEVICE_VENDOR_STRING: &str = "Apple";

// The owned CUDA contexts are stored globally. Each devives contains an unowned reference, so
// that devices can be cloned.
Expand Down Expand Up @@ -180,6 +182,8 @@ pub enum Vendor {
Intel,
/// GPU by NVIDIA.
Nvidia,
/// GPU by Apple.
Apple,
}

impl TryFrom<&str> for Vendor {
Expand All @@ -191,6 +195,7 @@ impl TryFrom<&str> for Vendor {
AMD_DEVICE_ON_APPLE_VENDOR_STRING => Ok(Self::Amd),
INTEL_DEVICE_VENDOR_STRING => Ok(Self::Intel),
NVIDIA_DEVICE_VENDOR_STRING => Ok(Self::Nvidia),
APPLE_DEVICE_VENDOR_STRING => Ok(Self::Apple),
_ => Err(GPUError::UnsupportedVendor(vendor.to_string())),
}
}
Expand All @@ -205,6 +210,7 @@ impl TryFrom<u32> for Vendor {
AMD_DEVICE_ON_APPLE_VENDOR_ID => Ok(Self::Amd),
INTEL_DEVICE_VENDOR_ID => Ok(Self::Intel),
NVIDIA_DEVICE_VENDOR_ID => Ok(Self::Nvidia),
APPLE_DEVICE_VENDOR_ID => Ok(Self::Apple),
_ => Err(GPUError::UnsupportedVendor(format!("0x{:x}", vendor))),
}
}
Expand All @@ -216,6 +222,7 @@ impl fmt::Display for Vendor {
Self::Amd => AMD_DEVICE_VENDOR_STRING,
Self::Intel => INTEL_DEVICE_VENDOR_STRING,
Self::Nvidia => NVIDIA_DEVICE_VENDOR_STRING,
Self::Apple => APPLE_DEVICE_VENDOR_STRING,
};
write!(f, "{}", vendor)
}
Expand Down
14 changes: 11 additions & 3 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,16 @@ use rustacuda::error::CudaError;
pub enum GPUError {
/// Error from the underlying `opencl3` library, e.g. a memory allocation failure.
#[cfg(feature = "opencl")]
#[error("Opencl3 Error: {0}{}", match .1 {
#[error("Opencl3 Error: {0}{}", match .message {
Some(message) => format!(" {}", message),
None => "".to_string(),
})]
Opencl3(ClError, Option<String>),
Opencl3 {
/// The error code.
error: ClError,
/// The error message.
message: Option<String>,
},

/// Error for OpenCL `clGetProgramInfo()` call failures.
#[cfg(feature = "opencl")]
Expand Down Expand Up @@ -63,6 +68,9 @@ pub type GPUResult<T> = std::result::Result<T, GPUError>;
#[cfg(feature = "opencl")]
impl From<ClError> for GPUError {
fn from(error: ClError) -> Self {
GPUError::Opencl3(error, None)
GPUError::Opencl3 {
error,
message: None,
}
}
}
24 changes: 11 additions & 13 deletions src/opencl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,10 @@ impl Program {
let mut program = opencl3::program::Program::create_from_source(&context, src)?;
if let Err(build_error) = program.build(context.devices(), "") {
let log = program.get_build_log(context.devices()[0])?;
return Err(GPUError::Opencl3(build_error, Some(log)));
return Err(GPUError::Opencl3 {
error: build_error,
message: Some(log),
});
}
debug!(
"Building kernel ({}) from source: done.",
Expand Down Expand Up @@ -191,7 +194,10 @@ impl Program {
}?;
if let Err(build_error) = program.build(context.devices(), "") {
let log = program.get_build_log(context.devices()[0])?;
return Err(GPUError::Opencl3(build_error, Some(log)));
return Err(GPUError::Opencl3 {
error: build_error,
message: Some(log),
});
}
let queue = CommandQueue::create_default(&context, 0)?;
let kernels = opencl3::kernel::create_program_kernels(&program)?;
Expand Down Expand Up @@ -258,9 +264,7 @@ impl Program {
)?
};
// Transmuting types is safe as long a sizes match.
let bytes = unsafe {
std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len)
};
let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) };
// Write some data right-away. This makes a significant performance different.
unsafe {
self.queue
Expand Down Expand Up @@ -314,10 +318,7 @@ impl Program {

// It is safe as long as the sizes match.
let bytes = unsafe {
std::slice::from_raw_parts(
data.as_ptr() as *const T as *const u8,
mem::size_of_val(data),
)
std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data))
};
unsafe {
self.queue
Expand All @@ -332,10 +333,7 @@ impl Program {

// It is safe as long as the sizes match.
let bytes = unsafe {
std::slice::from_raw_parts_mut(
data.as_mut_ptr() as *mut T as *mut u8,
mem::size_of_val(data),
)
std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data))
};
unsafe {
self.queue
Expand Down
Loading