diff --git a/src/vulkan/gpu_buf.c b/src/vulkan/gpu_buf.c
index d35d455c..91a2195c 100644
--- a/src/vulkan/gpu_buf.c
+++ b/src/vulkan/gpu_buf.c
@@ -186,9 +186,13 @@ pl_buf vk_buf_create(pl_gpu gpu, const struct pl_buf_params *params)
     case PL_BUF_MEM_AUTO:
         // We generally prefer VRAM since it's faster than RAM, but any number
         // of other requirements could potentially exclude it, so just mark it
-        // as optimal by default.
-        if (!(mparams.optimal & VK_MEMORY_PROPERTY_HOST_CACHED_BIT))
+        // as optimal by default. Additionally, don't do this if the available
+        // VRAM size is very small.
+        if (!(mparams.optimal & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
+            params->size * MAPPED_VRAM_THRESHOLD <= gpu->limits.max_mapped_vram)
+        {
             mparams.optimal |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+        }
         break;
     case PL_BUF_MEM_DEVICE:
         // Force device local memory.
diff --git a/src/vulkan/malloc.h b/src/vulkan/malloc.h
index 115352e2..154e87f4 100644
--- a/src/vulkan/malloc.h
+++ b/src/vulkan/malloc.h
@@ -19,6 +19,13 @@
 
 #include "common.h"
 
+// The threshold for which allocations to serve from host-mapped VRAM, as
+// opposed to host memory. Will not allocate more than this fraction of VRAM in
+// one go. (For a 256 MB non-resizable BAR, this is equivalent to 4 MB)
+//
+// Note: Not actually used by malloc.c, but by gpu_buf.c
+#define MAPPED_VRAM_THRESHOLD 64
+
 // All memory allocated from a vk_malloc MUST be explicitly released by
 // the caller before vk_malloc_destroy is called.
 struct vk_malloc *vk_malloc_create(struct vk_ctx *vk);