From 372b2ac0cecb59ad3c1be64655d369dd805eaf8e Mon Sep 17 00:00:00 2001 From: John-John Tedro Date: Wed, 3 Jan 2024 08:27:10 +0100 Subject: [PATCH] vulkan: Replace fence with semaphore when acquiring surfaces --- wgpu-core/src/device/queue.rs | 37 +++++- wgpu-hal/examples/halmark/main.rs | 23 +++- wgpu-hal/examples/raw-gles.rs | 2 +- wgpu-hal/examples/ray-traced-triangle/main.rs | 23 +++- wgpu-hal/src/dx12/mod.rs | 2 + wgpu-hal/src/empty.rs | 2 + wgpu-hal/src/gles/mod.rs | 1 + wgpu-hal/src/gles/queue.rs | 1 + wgpu-hal/src/lib.rs | 23 ++++ wgpu-hal/src/metal/mod.rs | 2 + wgpu-hal/src/vulkan/device.rs | 15 ++- wgpu-hal/src/vulkan/instance.rs | 24 ++-- wgpu-hal/src/vulkan/mod.rs | 105 ++++++++++++------ 13 files changed, 201 insertions(+), 59 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 4667c157129..a9a1265e526 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -22,7 +22,7 @@ use crate::{ resource_log, track, FastHashMap, SubmissionIndex, }; -use hal::{CommandEncoder as _, Device as _, Queue as _}; +use hal::{CommandEncoder as _, Device as _, Queue as _, RawSet as _}; use parking_lot::Mutex; use std::{ @@ -1133,6 +1133,12 @@ impl Global { .fetch_add(1, Ordering::Relaxed) + 1; let mut active_executions = Vec::new(); + + // SAFETY: We're constructing this during the submission phase, + // where all resources it uses are guaranteed to outlive this + // short-lived set. + let mut submit_surface_textures = A::SubmitSurfaceTextureSet::new(); + let mut used_surface_textures = track::TextureUsageScope::new(); let snatch_guard = device.snatchable_lock.read(); @@ -1237,8 +1243,19 @@ impl Global { return Err(QueueSubmitError::DestroyedTexture(id)); } Some(TextureInner::Native { .. }) => false, - Some(TextureInner::Surface { ref has_work, .. }) => { + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { has_work.store(true, Ordering::Relaxed); + + if let Some(raw) = raw { + unsafe { + submit_surface_textures.insert(raw); + } + } + true } }; @@ -1429,8 +1446,19 @@ impl Global { return Err(QueueSubmitError::DestroyedTexture(id)); } Some(TextureInner::Native { .. }) => {} - Some(TextureInner::Surface { ref has_work, .. }) => { + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { has_work.store(true, Ordering::Relaxed); + + if let Some(raw) = raw { + unsafe { + submit_surface_textures.insert(raw); + } + } + unsafe { used_surface_textures .merge_single(texture, None, hal::TextureUses::PRESENT) @@ -1469,12 +1497,13 @@ impl Global { .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), ) .collect::>(); + unsafe { queue .raw .as_ref() .unwrap() - .submit(&refs, Some((fence, submit_index))) + .submit(&refs, &submit_surface_textures, Some((fence, submit_index))) .map_err(DeviceError::from)?; } diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 18f283d8e7f..22df88e3203 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -3,7 +3,8 @@ extern crate wgpu_hal as hal; use hal::{ - Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, RawSet as _, + Surface as _, }; use raw_window_handle::{HasDisplayHandle, HasWindowHandle}; use winit::{ @@ -489,8 +490,13 @@ impl Example { let fence = unsafe { let mut fence = device.create_fence().unwrap(); let init_cmd = cmd_encoder.end_encoding().unwrap(); + let surface_textures = A::SubmitSurfaceTextureSet::new(); queue - .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .submit( + &[&init_cmd], + &surface_textures, + Some((&mut fence, init_fence_value)), + ) .unwrap(); device.wait(&fence, init_fence_value, !0).unwrap(); device.destroy_buffer(staging_buffer); @@ -541,8 +547,13 @@ impl Example { unsafe { { let ctx = &mut self.contexts[self.context_index]; + let surface_textures = A::SubmitSurfaceTextureSet::new(); self.queue - .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .submit( + &[], + &surface_textures, + Some((&mut ctx.fence, ctx.fence_value)), + ) .unwrap(); } @@ -729,7 +740,11 @@ impl Example { } else { None }; - self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + let mut surface_textures = A::SubmitSurfaceTextureSet::new(); + surface_textures.insert(&surface_tex); + self.queue + .submit(&[&cmd_buf], &surface_textures, fence_param) + .unwrap(); self.queue.present(&self.surface, surface_tex).unwrap(); ctx.used_cmd_bufs.push(cmd_buf); ctx.used_views.push(surface_tex_view); diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index 81ab4171e3d..0612e31a429 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -183,6 +183,6 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height encoder.begin_render_pass(&rp_desc); encoder.end_render_pass(); let cmd_buf = encoder.end_encoding().unwrap(); - od.queue.submit(&[&cmd_buf], None).unwrap(); + od.queue.submit(&[&cmd_buf], &(), None).unwrap(); } } diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 6454cb89988..dcdbba156cf 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -1,7 +1,8 @@ extern crate wgpu_hal as hal; use hal::{ - Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, Surface as _, + Adapter as _, CommandEncoder as _, Device as _, Instance as _, Queue as _, RawSet as _, + Surface as _, }; use raw_window_handle::{HasDisplayHandle, HasWindowHandle}; @@ -754,8 +755,13 @@ impl Example { let fence = unsafe { let mut fence = device.create_fence().unwrap(); let init_cmd = cmd_encoder.end_encoding().unwrap(); + let surface_textures = A::SubmitSurfaceTextureSet::new(); queue - .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .submit( + &[&init_cmd], + &surface_textures, + Some((&mut fence, init_fence_value)), + ) .unwrap(); device.wait(&fence, init_fence_value, !0).unwrap(); cmd_encoder.reset_all(iter::once(init_cmd)); @@ -960,7 +966,11 @@ impl Example { } else { None }; - self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + let mut surface_textures = A::SubmitSurfaceTextureSet::new(); + surface_textures.insert(&surface_tex); + self.queue + .submit(&[&cmd_buf], &surface_textures, fence_param) + .unwrap(); self.queue.present(&self.surface, surface_tex).unwrap(); ctx.used_cmd_bufs.push(cmd_buf); ctx.used_views.push(surface_tex_view); @@ -998,8 +1008,13 @@ impl Example { unsafe { { let ctx = &mut self.contexts[self.context_index]; + let surface_textures = A::SubmitSurfaceTextureSet::new(); self.queue - .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .submit( + &[], + &surface_textures, + Some((&mut ctx.fence, ctx.fence_value)), + ) .unwrap(); } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index e0cd1c15cf6..d3875649c12 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -84,6 +84,7 @@ impl crate::Api for Api { type ComputePipeline = ComputePipeline; type AccelerationStructure = AccelerationStructure; + type SubmitSurfaceTextureSet = (); } // Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries. @@ -877,6 +878,7 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + _surface_textures: &(), signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { let mut temp_lists = self.temp_lists.lock(); diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 9fd42bd6f5c..0f68d97d78b 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -37,6 +37,7 @@ impl crate::Api for Api { type ShaderModule = Resource; type RenderPipeline = Resource; type ComputePipeline = Resource; + type SubmitSurfaceTextureSet = (); } impl crate::Instance for Context { @@ -104,6 +105,7 @@ impl crate::Queue for Context { unsafe fn submit( &self, command_buffers: &[&Resource], + surface_textures: &(), signal_fence: Option<(&mut Resource, crate::FenceValue)>, ) -> DeviceResult<()> { Ok(()) diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 7021c3e12d0..305af565ca1 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -161,6 +161,7 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type SubmitSurfaceTextureSet = (); } bitflags::bitflags! { diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 4ee6fb8e471..6b3e4e0f200 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1748,6 +1748,7 @@ impl crate::Queue for super::Queue { unsafe fn submit( &self, command_buffers: &[&super::CommandBuffer], + _surface_textures: &(), signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { let shared = Arc::clone(&self.shared); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 39037e895c0..8a5cafc7553 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -214,6 +214,7 @@ pub trait Api: Clone + fmt::Debug + Sized { type ComputePipeline: fmt::Debug + WasmNotSendSync; type AccelerationStructure: fmt::Debug + WasmNotSendSync + 'static; + type SubmitSurfaceTextureSet: RawSet; } pub trait Instance: Sized + WasmNotSendSync { @@ -413,9 +414,12 @@ pub trait Queue: WasmNotSendSync { /// - all of the command buffers were created from command pools /// that are associated with this queue. /// - all of the command buffers had `CommadBuffer::finish()` called. + /// - all surface textures that the command buffers write to must be + /// passed to the surface_textures argument. unsafe fn submit( &self, command_buffers: &[&A::CommandBuffer], + surface_textures: &A::SubmitSurfaceTextureSet, signal_fence: Option<(&mut A::Fence, FenceValue)>, ) -> Result<(), DeviceError>; unsafe fn present( @@ -718,6 +722,25 @@ bitflags!( } ); +pub trait RawSet { + /// Construct a new set unsafely. + fn new() -> Self; + + /// Insert a value into the raw set. + /// + /// The caller is responsible for ensuring that the set doesn't outlive the + /// values it contains. The exact requirements depends on which set is being + /// constructed. + unsafe fn insert(&mut self, value: &T); +} + +/// Provide a default implementation for () for backends which do not need to +/// track any raw resources so they can easily be stubbed out. +impl RawSet for () { + fn new() -> Self {} + unsafe fn insert(&mut self, _: &T) {} +} + bitflags!( /// Texture format capability flags. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 39589115e73..d11fd6a7b24 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -68,6 +68,7 @@ impl crate::Api for Api { type ComputePipeline = ComputePipeline; type AccelerationStructure = AccelerationStructure; + type SubmitSurfaceTextureSet = (); } pub struct Instance { @@ -368,6 +369,7 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + _surface_textures: &(), signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { objc::rc::autoreleasepool(|| { diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a37017a9e67..50badb8923b 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -627,8 +627,16 @@ impl super::Device { let images = unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?; - let vk_info = vk::FenceCreateInfo::builder().build(); - let fence = unsafe { self.shared.raw.create_fence(&vk_info, None) } + // NOTE: It's important that we define at least images.len() + 1 wait + // semaphores, since we prospectively need to provide the call to + // acquire the next image with an unsignaled semaphore. + let surface_semaphores = (0..images.len() + 1) + .map(|_| unsafe { + self.shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None) + }) + .collect::, _>>() .map_err(crate::DeviceError::from)?; Ok(super::Swapchain { @@ -636,10 +644,11 @@ impl super::Device { raw_flags, functor, device: Arc::clone(&self.shared), - fence, images, config: config.clone(), view_formats: wgt_view_formats, + surface_semaphores, + next_surface_index: 0, }) } diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index e9ca3221ead..ccfb790afaf 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -169,7 +169,7 @@ impl super::Swapchain { /// # Safety /// /// - The device must have been made idle before calling this function. - unsafe fn release_resources(self, device: &ash::Device) -> Self { + unsafe fn release_resources(mut self, device: &ash::Device) -> Self { profiling::scope!("Swapchain::release_resources"); { profiling::scope!("vkDeviceWaitIdle"); @@ -177,7 +177,13 @@ impl super::Swapchain { // the presentation work is done, we are forced to wait until the device is idle. let _ = unsafe { device.device_wait_idle() }; }; - unsafe { device.destroy_fence(self.fence, None) }; + + for semaphore in self.surface_semaphores.drain(..) { + unsafe { + device.destroy_semaphore(semaphore, None); + } + } + self } } @@ -934,10 +940,12 @@ impl crate::Surface for super::Surface { timeout_ns = u64::MAX; } + let wait_semaphore = sc.surface_semaphores[sc.next_surface_index]; + // will block if no image is available let (index, suboptimal) = match unsafe { sc.functor - .acquire_next_image(sc.raw, timeout_ns, vk::Semaphore::null(), sc.fence) + .acquire_next_image(sc.raw, timeout_ns, wait_semaphore, vk::Fence::null()) } { // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. // See the comment in `Queue::present`. @@ -957,17 +965,14 @@ impl crate::Surface for super::Surface { } }; + sc.next_surface_index += 1; + sc.next_surface_index %= sc.surface_semaphores.len(); + // special case for Intel Vulkan returning bizzare values (ugh) if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 { return Err(crate::SurfaceError::Outdated); } - let fences = &[sc.fence]; - - unsafe { sc.device.raw.wait_for_fences(fences, true, !0) } - .map_err(crate::DeviceError::from)?; - unsafe { sc.device.raw.reset_fences(fences) }.map_err(crate::DeviceError::from)?; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209 let raw_flags = if sc .raw_flags @@ -994,6 +999,7 @@ impl crate::Surface for super::Surface { }, view_formats: sc.view_formats.clone(), }, + wait_semaphore, }; Ok(Some(crate::AcquiredSurfaceTexture { texture, diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 45deda5d5b3..9f60a8e9f4f 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -49,6 +49,8 @@ use ash::{ }; use parking_lot::{Mutex, RwLock}; +use crate::RawSet; + const MILLIS_TO_NANOS: u64 = 1_000_000; const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1; @@ -80,6 +82,23 @@ impl crate::Api for Api { type ShaderModule = ShaderModule; type RenderPipeline = RenderPipeline; type ComputePipeline = ComputePipeline; + type SubmitSurfaceTextureSet = SubmitSurfaceTextureSet; +} + +pub struct SubmitSurfaceTextureSet { + semaphores: ArrayVec, +} + +impl RawSet for SubmitSurfaceTextureSet { + fn new() -> Self { + Self { + semaphores: ArrayVec::new(), + } + } + + unsafe fn insert(&mut self, texture: &SurfaceTexture) { + self.semaphores.push(texture.wait_semaphore); + } } struct DebugUtils { @@ -146,10 +165,14 @@ struct Swapchain { raw_flags: vk::SwapchainCreateFlagsKHR, functor: khr::Swapchain, device: Arc, - fence: vk::Fence, images: Vec, config: crate::SurfaceConfiguration, view_formats: Vec, + /// One wait semaphore per swapchain image. This will be associated with the + /// surface texture, and later collected during submission. + surface_semaphores: Vec, + /// Current semaphore index to use when acquiring a surface. + next_surface_index: usize, } pub struct Surface { @@ -163,6 +186,7 @@ pub struct Surface { pub struct SurfaceTexture { index: u32, texture: Texture, + wait_semaphore: vk::Semaphore, } impl Borrow for SurfaceTexture { @@ -585,29 +609,43 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + surface_textures: &SubmitSurfaceTextureSet, signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { - let vk_cmd_buffers = command_buffers - .iter() - .map(|cmd| cmd.raw) - .collect::>(); + let mut fence_raw = vk::Fence::null(); - let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + let mut wait_stage_masks = Vec::new(); + let mut wait_semaphores = Vec::new(); + let mut signal_semaphores = ArrayVec::<_, 2>::new(); + let mut signal_values = ArrayVec::<_, 2>::new(); - let mut fence_raw = vk::Fence::null(); - let mut vk_timeline_info; - let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()]; - let signal_values; + for &wait in &surface_textures.semaphores { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(wait); + } + + let old_index = self.relay_index.load(Ordering::Relaxed); + + let sem_index = if old_index >= 0 { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(self.relay_semaphores[old_index as usize]); + (old_index as usize + 1) % self.relay_semaphores.len() + } else { + 0 + }; + + signal_semaphores.push(self.relay_semaphores[sem_index]); + + self.relay_index + .store(sem_index as isize, Ordering::Relaxed); if let Some((fence, value)) = signal_fence { fence.maintain(&self.device.raw)?; match *fence { Fence::TimelineSemaphore(raw) => { - signal_values = [!0, value]; - signal_semaphores[1] = raw; - vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder() - .signal_semaphore_values(&signal_values); - vk_info = vk_info.push_next(&mut vk_timeline_info); + signal_semaphores.push(raw); + signal_values.push(!0); + signal_values.push(value); } Fence::FencePool { ref mut active, @@ -627,26 +665,25 @@ impl crate::Queue for Queue { } } - let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE]; - let old_index = self.relay_index.load(Ordering::Relaxed); - let sem_index = if old_index >= 0 { - vk_info = vk_info - .wait_semaphores(&self.relay_semaphores[old_index as usize..old_index as usize + 1]) - .wait_dst_stage_mask(&wait_stage_mask); - (old_index as usize + 1) % self.relay_semaphores.len() - } else { - 0 - }; - self.relay_index - .store(sem_index as isize, Ordering::Relaxed); - signal_semaphores[0] = self.relay_semaphores[sem_index]; + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::>(); - let signal_count = if signal_semaphores[1] == vk::Semaphore::null() { - 1 - } else { - 2 - }; - vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]); + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + vk_info = vk_info + .wait_semaphores(&wait_semaphores) + .wait_dst_stage_mask(&wait_stage_masks) + .signal_semaphores(&signal_semaphores); + + let mut vk_timeline_info; + + if !signal_values.is_empty() { + vk_timeline_info = + vk::TimelineSemaphoreSubmitInfo::builder().signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } profiling::scope!("vkQueueSubmit"); unsafe {