Allow FP16 accumulation with --fast

Currently only applies to PyTorch nightly releases. (>=20250112)
comfyanonymous · Jan 13, 2025 · 6789f3d · 6789f3d
1 parent 1f1c7b7
commit 6789f3d
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/comfy/model_management.py b/comfy/model_management.py
@@ -241,6 +241,12 @@ def is_amd():
     torch.backends.cuda.enable_flash_sdp(True)
     torch.backends.cuda.enable_mem_efficient_sdp(True)
 
+try:
+    if is_nvidia() and args.fast:
+        torch.backends.cuda.matmul.allow_fp16_accumulation = True
+except:
+    pass
+
 try:
     if int(torch_version[0]) == 2 and int(torch_version[2]) >= 5:
         torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)