diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index be5c202ffb..43a2d1d982 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -24,6 +24,7 @@ use crate::{ use hal::{CommandEncoder as _, Device as _, Queue as _}; use parking_lot::Mutex; +use smallvec::SmallVec; use std::{ iter, mem, ptr, @@ -1115,10 +1116,13 @@ impl Global { .fetch_add(1, Ordering::Relaxed) + 1; let mut active_executions = Vec::new(); + let mut used_surface_textures = track::TextureUsageScope::new(); let snatch_guard = device.snatchable_lock.read(); + let mut submit_surface_textures_owned = SmallVec::<[_; 2]>::new(); + { let mut command_buffer_guard = hub.command_buffers.write(); @@ -1217,8 +1221,17 @@ impl Global { return Err(QueueSubmitError::DestroyedTexture(id)); } Some(TextureInner::Native { .. }) => false, - Some(TextureInner::Surface { ref has_work, .. }) => { + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + true } }; @@ -1409,8 +1422,17 @@ impl Global { return Err(QueueSubmitError::DestroyedTexture(id)); } Some(TextureInner::Native { .. }) => {} - Some(TextureInner::Surface { ref has_work, .. }) => { + Some(TextureInner::Surface { + ref has_work, + ref raw, + .. + }) => { has_work.store(true, Ordering::Relaxed); + + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + unsafe { used_surface_textures .merge_single(texture, None, hal::TextureUses::PRESENT) @@ -1449,12 +1471,23 @@ impl Global { .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), ) .collect::>(); + + let mut submit_surface_textures = + SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len()); + + for texture in &submit_surface_textures_owned { + submit_surface_textures.extend(match texture.inner.get(&snatch_guard) { + Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), + _ => None, + }); + } + unsafe { queue .raw .as_ref() .unwrap() - .submit(&refs, Some((fence, submit_index))) + .submit(&refs, &submit_surface_textures, Some((fence, submit_index))) .map_err(DeviceError::from)?; } diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 7bc8013415..c238f299e7 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -490,7 +490,7 @@ impl Example { let mut fence = device.create_fence().unwrap(); let init_cmd = cmd_encoder.end_encoding().unwrap(); queue - .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value))) .unwrap(); device.wait(&fence, init_fence_value, !0).unwrap(); device.destroy_buffer(staging_buffer); @@ -542,7 +542,7 @@ impl Example { { let ctx = &mut self.contexts[self.context_index]; self.queue - .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value))) .unwrap(); } @@ -729,7 +729,9 @@ impl Example { } else { None }; - self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue + .submit(&[&cmd_buf], &[&surface_tex], fence_param) + .unwrap(); self.queue.present(&self.surface, surface_tex).unwrap(); ctx.used_cmd_bufs.push(cmd_buf); ctx.used_views.push(surface_tex_view); diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index 81ab4171e3..342100e1cb 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -183,6 +183,6 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height encoder.begin_render_pass(&rp_desc); encoder.end_render_pass(); let cmd_buf = encoder.end_encoding().unwrap(); - od.queue.submit(&[&cmd_buf], None).unwrap(); + od.queue.submit(&[&cmd_buf], &[], None).unwrap(); } } diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 01a0968f3d..c05feae820 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -755,7 +755,7 @@ impl Example { let mut fence = device.create_fence().unwrap(); let init_cmd = cmd_encoder.end_encoding().unwrap(); queue - .submit(&[&init_cmd], Some((&mut fence, init_fence_value))) + .submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value))) .unwrap(); device.wait(&fence, init_fence_value, !0).unwrap(); cmd_encoder.reset_all(iter::once(init_cmd)); @@ -960,7 +960,9 @@ impl Example { } else { None }; - self.queue.submit(&[&cmd_buf], fence_param).unwrap(); + self.queue + .submit(&[&cmd_buf], &[&surface_tex], fence_param) + .unwrap(); self.queue.present(&self.surface, surface_tex).unwrap(); ctx.used_cmd_bufs.push(cmd_buf); ctx.used_views.push(surface_tex_view); @@ -999,7 +1001,7 @@ impl Example { { let ctx = &mut self.contexts[self.context_index]; self.queue - .submit(&[], Some((&mut ctx.fence, ctx.fence_value))) + .submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value))) .unwrap(); } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 362097e749..038bb8ca15 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -886,6 +886,7 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + _surface_textures: &[&Texture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { let mut temp_lists = self.temp_lists.lock(); diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 12f86e6f31..d58e779b96 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -104,6 +104,7 @@ impl crate::Queue for Context { unsafe fn submit( &self, command_buffers: &[&Resource], + surface_textures: &[&Resource], signal_fence: Option<(&mut Resource, crate::FenceValue)>, ) -> DeviceResult<()> { Ok(()) diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 5a4deb8e1a..6ec553bd29 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1748,6 +1748,7 @@ impl crate::Queue for super::Queue { unsafe fn submit( &self, command_buffers: &[&super::CommandBuffer], + _surface_textures: &[&super::Texture], signal_fence: Option<(&mut super::Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { let shared = Arc::clone(&self.shared); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 7961a4ed8e..561024dd98 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -413,9 +413,12 @@ pub trait Queue: WasmNotSendSync { /// - all of the command buffers were created from command pools /// that are associated with this queue. /// - all of the command buffers had `CommadBuffer::finish()` called. + /// - all surface textures that the command buffers write to must be + /// passed to the surface_textures argument. unsafe fn submit( &self, command_buffers: &[&A::CommandBuffer], + surface_textures: &[&A::SurfaceTexture], signal_fence: Option<(&mut A::Fence, FenceValue)>, ) -> Result<(), DeviceError>; unsafe fn present( diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 39589115e7..298f60faac 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -368,6 +368,7 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + _surface_textures: &[&SurfaceTexture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { objc::rc::autoreleasepool(|| { diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 23182b440c..fdfb6ee9ed 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -627,8 +627,16 @@ impl super::Device { let images = unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?; - let vk_info = vk::FenceCreateInfo::builder().build(); - let fence = unsafe { self.shared.raw.create_fence(&vk_info, None) } + // NOTE: It's important that we define at least images.len() + 1 wait + // semaphores, since we prospectively need to provide the call to + // acquire the next image with an unsignaled semaphore. + let surface_semaphores = (0..images.len() + 1) + .map(|_| unsafe { + self.shared + .raw + .create_semaphore(&vk::SemaphoreCreateInfo::builder(), None) + }) + .collect::, _>>() .map_err(crate::DeviceError::from)?; Ok(super::Swapchain { @@ -636,10 +644,11 @@ impl super::Device { raw_flags, functor, device: Arc::clone(&self.shared), - fence, images, config: config.clone(), view_formats: wgt_view_formats, + surface_semaphores, + next_surface_index: 0, }) } diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 179842c1e5..1f0159413f 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -169,7 +169,7 @@ impl super::Swapchain { /// # Safety /// /// - The device must have been made idle before calling this function. - unsafe fn release_resources(self, device: &ash::Device) -> Self { + unsafe fn release_resources(mut self, device: &ash::Device) -> Self { profiling::scope!("Swapchain::release_resources"); { profiling::scope!("vkDeviceWaitIdle"); @@ -177,7 +177,13 @@ impl super::Swapchain { // the presentation work is done, we are forced to wait until the device is idle. let _ = unsafe { device.device_wait_idle() }; }; - unsafe { device.destroy_fence(self.fence, None) }; + + for semaphore in self.surface_semaphores.drain(..) { + unsafe { + device.destroy_semaphore(semaphore, None); + } + } + self } } @@ -934,10 +940,12 @@ impl crate::Surface for super::Surface { timeout_ns = u64::MAX; } + let wait_semaphore = sc.surface_semaphores[sc.next_surface_index]; + // will block if no image is available let (index, suboptimal) = match unsafe { sc.functor - .acquire_next_image(sc.raw, timeout_ns, vk::Semaphore::null(), sc.fence) + .acquire_next_image(sc.raw, timeout_ns, wait_semaphore, vk::Fence::null()) } { // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android. // See the comment in `Queue::present`. @@ -957,17 +965,14 @@ impl crate::Surface for super::Surface { } }; + sc.next_surface_index += 1; + sc.next_surface_index %= sc.surface_semaphores.len(); + // special case for Intel Vulkan returning bizzare values (ugh) if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 { return Err(crate::SurfaceError::Outdated); } - let fences = &[sc.fence]; - - unsafe { sc.device.raw.wait_for_fences(fences, true, !0) } - .map_err(crate::DeviceError::from)?; - unsafe { sc.device.raw.reset_fences(fences) }.map_err(crate::DeviceError::from)?; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209 let raw_flags = if sc .raw_flags @@ -994,6 +999,7 @@ impl crate::Surface for super::Surface { }, view_formats: sc.view_formats.clone(), }, + wait_semaphore, }; Ok(Some(crate::AcquiredSurfaceTexture { texture, diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 45deda5d5b..787ebd7267 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -146,10 +146,14 @@ struct Swapchain { raw_flags: vk::SwapchainCreateFlagsKHR, functor: khr::Swapchain, device: Arc, - fence: vk::Fence, images: Vec, config: crate::SurfaceConfiguration, view_formats: Vec, + /// One wait semaphore per swapchain image. This will be associated with the + /// surface texture, and later collected during submission. + surface_semaphores: Vec, + /// Current semaphore index to use when acquiring a surface. + next_surface_index: usize, } pub struct Surface { @@ -163,6 +167,7 @@ pub struct Surface { pub struct SurfaceTexture { index: u32, texture: Texture, + wait_semaphore: vk::Semaphore, } impl Borrow for SurfaceTexture { @@ -585,29 +590,43 @@ impl crate::Queue for Queue { unsafe fn submit( &self, command_buffers: &[&CommandBuffer], + surface_textures: &[&SurfaceTexture], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { - let vk_cmd_buffers = command_buffers - .iter() - .map(|cmd| cmd.raw) - .collect::>(); + let mut fence_raw = vk::Fence::null(); - let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + let mut wait_stage_masks = Vec::new(); + let mut wait_semaphores = Vec::new(); + let mut signal_semaphores = ArrayVec::<_, 2>::new(); + let mut signal_values = ArrayVec::<_, 2>::new(); - let mut fence_raw = vk::Fence::null(); - let mut vk_timeline_info; - let mut signal_semaphores = [vk::Semaphore::null(), vk::Semaphore::null()]; - let signal_values; + for &surface_texture in surface_textures { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(surface_texture.wait_semaphore); + } + + let old_index = self.relay_index.load(Ordering::Relaxed); + + let sem_index = if old_index >= 0 { + wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE); + wait_semaphores.push(self.relay_semaphores[old_index as usize]); + (old_index as usize + 1) % self.relay_semaphores.len() + } else { + 0 + }; + + signal_semaphores.push(self.relay_semaphores[sem_index]); + + self.relay_index + .store(sem_index as isize, Ordering::Relaxed); if let Some((fence, value)) = signal_fence { fence.maintain(&self.device.raw)?; match *fence { Fence::TimelineSemaphore(raw) => { - signal_values = [!0, value]; - signal_semaphores[1] = raw; - vk_timeline_info = vk::TimelineSemaphoreSubmitInfo::builder() - .signal_semaphore_values(&signal_values); - vk_info = vk_info.push_next(&mut vk_timeline_info); + signal_semaphores.push(raw); + signal_values.push(!0); + signal_values.push(value); } Fence::FencePool { ref mut active, @@ -627,26 +646,25 @@ impl crate::Queue for Queue { } } - let wait_stage_mask = [vk::PipelineStageFlags::TOP_OF_PIPE]; - let old_index = self.relay_index.load(Ordering::Relaxed); - let sem_index = if old_index >= 0 { - vk_info = vk_info - .wait_semaphores(&self.relay_semaphores[old_index as usize..old_index as usize + 1]) - .wait_dst_stage_mask(&wait_stage_mask); - (old_index as usize + 1) % self.relay_semaphores.len() - } else { - 0 - }; - self.relay_index - .store(sem_index as isize, Ordering::Relaxed); - signal_semaphores[0] = self.relay_semaphores[sem_index]; + let vk_cmd_buffers = command_buffers + .iter() + .map(|cmd| cmd.raw) + .collect::>(); - let signal_count = if signal_semaphores[1] == vk::Semaphore::null() { - 1 - } else { - 2 - }; - vk_info = vk_info.signal_semaphores(&signal_semaphores[..signal_count]); + let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers); + + vk_info = vk_info + .wait_semaphores(&wait_semaphores) + .wait_dst_stage_mask(&wait_stage_masks) + .signal_semaphores(&signal_semaphores); + + let mut vk_timeline_info; + + if !signal_values.is_empty() { + vk_timeline_info = + vk::TimelineSemaphoreSubmitInfo::builder().signal_semaphore_values(&signal_values); + vk_info = vk_info.push_next(&mut vk_timeline_info); + } profiling::scope!("vkQueueSubmit"); unsafe {