|
1 |
| -import os |
2 |
| -import importlib.util |
3 |
| -from comfy.cli_args import args |
4 |
| -import subprocess |
5 |
| - |
6 |
| -#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import. |
7 |
| -def get_gpu_names(): |
8 |
| - if os.name == 'nt': |
9 |
| - import ctypes |
10 |
| - |
11 |
| - # Define necessary C structures and types |
12 |
| - class DISPLAY_DEVICEA(ctypes.Structure): |
13 |
| - _fields_ = [ |
14 |
| - ('cb', ctypes.c_ulong), |
15 |
| - ('DeviceName', ctypes.c_char * 32), |
16 |
| - ('DeviceString', ctypes.c_char * 128), |
17 |
| - ('StateFlags', ctypes.c_ulong), |
18 |
| - ('DeviceID', ctypes.c_char * 128), |
19 |
| - ('DeviceKey', ctypes.c_char * 128) |
20 |
| - ] |
21 |
| - |
22 |
| - # Load user32.dll |
23 |
| - user32 = ctypes.windll.user32 |
24 |
| - |
25 |
| - # Call EnumDisplayDevicesA |
26 |
| - def enum_display_devices(): |
27 |
| - device_info = DISPLAY_DEVICEA() |
28 |
| - device_info.cb = ctypes.sizeof(device_info) |
29 |
| - device_index = 0 |
30 |
| - gpu_names = set() |
31 |
| - |
32 |
| - while user32.EnumDisplayDevicesA(None, device_index, ctypes.byref(device_info), 0): |
33 |
| - device_index += 1 |
34 |
| - gpu_names.add(device_info.DeviceString.decode('utf-8')) |
35 |
| - return gpu_names |
36 |
| - return enum_display_devices() |
37 |
| - else: |
38 |
| - gpu_names = set() |
39 |
| - out = subprocess.check_output(['nvidia-smi', '-L']) |
40 |
| - for l in out.split(b'\n'): |
41 |
| - if len(l) > 0: |
42 |
| - gpu_names.add(l.decode('utf-8').split(' (UUID')[0]) |
43 |
| - return gpu_names |
44 |
| - |
45 |
| -blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", |
46 |
| - "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620", |
47 |
| - "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000", |
48 |
| - "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000", |
49 |
| - "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M", |
50 |
| - "GeForce GTX 1650", "GeForce GTX 1630", "Tesla M4", "Tesla M6", "Tesla M10", "Tesla M40", "Tesla M60" |
51 |
| - } |
52 |
| - |
53 |
| -def cuda_malloc_supported(): |
54 |
| - try: |
55 |
| - names = get_gpu_names() |
56 |
| - except: |
57 |
| - names = set() |
58 |
| - for x in names: |
59 |
| - if "NVIDIA" in x: |
60 |
| - for b in blacklist: |
61 |
| - if b in x: |
62 |
| - return False |
63 |
| - return True |
64 |
| - |
65 |
| - |
66 |
| -if not args.cuda_malloc: |
67 |
| - try: |
68 |
| - version = "" |
69 |
| - torch_spec = importlib.util.find_spec("torch") |
70 |
| - for folder in torch_spec.submodule_search_locations: |
71 |
| - ver_file = os.path.join(folder, "version.py") |
72 |
| - if os.path.isfile(ver_file): |
73 |
| - spec = importlib.util.spec_from_file_location("torch_version_import", ver_file) |
74 |
| - module = importlib.util.module_from_spec(spec) |
75 |
| - spec.loader.exec_module(module) |
76 |
| - version = module.__version__ |
77 |
| - if int(version[0]) >= 2: #enable by default for torch version 2.0 and up |
78 |
| - args.cuda_malloc = cuda_malloc_supported() |
79 |
| - except: |
80 |
| - pass |
81 |
| - |
82 |
| - |
83 |
| -if args.cuda_malloc and not args.disable_cuda_malloc: |
84 |
| - env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None) |
85 |
| - if env_var is None: |
86 |
| - env_var = "backend:cudaMallocAsync" |
87 |
| - else: |
88 |
| - env_var += ",backend:cudaMallocAsync" |
89 |
| - |
90 |
| - os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var |
| 1 | +import os |
| 2 | +import importlib.util |
| 3 | +from comfy.cli_args import args |
| 4 | +import subprocess |
| 5 | + |
| 6 | +#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import. |
| 7 | +def get_gpu_names(): |
| 8 | + if os.name == 'nt': |
| 9 | + import ctypes |
| 10 | + |
| 11 | + # Define necessary C structures and types |
| 12 | + class DISPLAY_DEVICEA(ctypes.Structure): |
| 13 | + _fields_ = [ |
| 14 | + ('cb', ctypes.c_ulong), |
| 15 | + ('DeviceName', ctypes.c_char * 32), |
| 16 | + ('DeviceString', ctypes.c_char * 128), |
| 17 | + ('StateFlags', ctypes.c_ulong), |
| 18 | + ('DeviceID', ctypes.c_char * 128), |
| 19 | + ('DeviceKey', ctypes.c_char * 128) |
| 20 | + ] |
| 21 | + |
| 22 | + # Load user32.dll |
| 23 | + user32 = ctypes.windll.user32 |
| 24 | + |
| 25 | + # Call EnumDisplayDevicesA |
| 26 | + def enum_display_devices(): |
| 27 | + device_info = DISPLAY_DEVICEA() |
| 28 | + device_info.cb = ctypes.sizeof(device_info) |
| 29 | + device_index = 0 |
| 30 | + gpu_names = set() |
| 31 | + |
| 32 | + while user32.EnumDisplayDevicesA(None, device_index, ctypes.byref(device_info), 0): |
| 33 | + device_index += 1 |
| 34 | + gpu_names.add(device_info.DeviceString.decode('utf-8')) |
| 35 | + return gpu_names |
| 36 | + return enum_display_devices() |
| 37 | + else: |
| 38 | + gpu_names = set() |
| 39 | + out = subprocess.check_output(['nvidia-smi', '-L']) |
| 40 | + for l in out.split(b'\n'): |
| 41 | + if len(l) > 0: |
| 42 | + gpu_names.add(l.decode('utf-8').split(' (UUID')[0]) |
| 43 | + return gpu_names |
| 44 | + |
| 45 | +blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", |
| 46 | + "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620", |
| 47 | + "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000", |
| 48 | + "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000", |
| 49 | + "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M", |
| 50 | + "GeForce GTX 1650", "GeForce GTX 1630", "Tesla M4", "Tesla M6", "Tesla M10", "Tesla M40", "Tesla M60" |
| 51 | + } |
| 52 | + |
| 53 | +def is_ixuca(): |
| 54 | + try: |
| 55 | + import torch |
| 56 | + return hasattr(torch, "corex") |
| 57 | + except ImportError: |
| 58 | + return False |
| 59 | + |
| 60 | +def cuda_malloc_supported(): |
| 61 | + if is_ixuca(): |
| 62 | + return False |
| 63 | + |
| 64 | + try: |
| 65 | + names = get_gpu_names() |
| 66 | + except: |
| 67 | + names = set() |
| 68 | + for x in names: |
| 69 | + if "NVIDIA" in x: |
| 70 | + for b in blacklist: |
| 71 | + if b in x: |
| 72 | + return False |
| 73 | + return True |
| 74 | + |
| 75 | + |
| 76 | +if not args.cuda_malloc: |
| 77 | + try: |
| 78 | + version = "" |
| 79 | + torch_spec = importlib.util.find_spec("torch") |
| 80 | + for folder in torch_spec.submodule_search_locations: |
| 81 | + ver_file = os.path.join(folder, "version.py") |
| 82 | + if os.path.isfile(ver_file): |
| 83 | + spec = importlib.util.spec_from_file_location("torch_version_import", ver_file) |
| 84 | + module = importlib.util.module_from_spec(spec) |
| 85 | + spec.loader.exec_module(module) |
| 86 | + version = module.__version__ |
| 87 | + if int(version[0]) >= 2: #enable by default for torch version 2.0 and up |
| 88 | + args.cuda_malloc = cuda_malloc_supported() |
| 89 | + except: |
| 90 | + pass |
| 91 | + |
| 92 | + |
| 93 | +if args.cuda_malloc and not args.disable_cuda_malloc: |
| 94 | + env_var = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', None) |
| 95 | + if env_var is None: |
| 96 | + env_var = "backend:cudaMallocAsync" |
| 97 | + else: |
| 98 | + env_var += ",backend:cudaMallocAsync" |
| 99 | + |
| 100 | + os.environ['PYTORCH_CUDA_ALLOC_CONF'] = env_var |
0 commit comments