-
Notifications
You must be signed in to change notification settings - Fork 91
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
device properties #409
base: main
Are you sure you want to change the base?
device properties #409
Conversation
/ok to test |
|
@@ -14,6 +14,1021 @@ | |||
_tls_lock = threading.Lock() | |||
|
|||
|
|||
# ruff: noqa | |||
class DeviceProperties: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
alternatively, to reduce code bloat, this could look like below. 186 lines vs ~1000. The same could be applied to kernel attributes for consistency, or not, since those are far less bloated. This exact code chunk is untested but the concept stands.
class DeviceProperties: | |
class DeviceAttributes: | |
""" | |
A class to query various attributes of a CUDA device. | |
Attributes are read-only and provide information about the device. | |
""" | |
_attributes = { | |
"max_threads_per_block": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, | |
"max_block_dim_x": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, | |
"max_block_dim_y": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, | |
"max_block_dim_z": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, | |
"max_grid_dim_x": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, | |
"max_grid_dim_y": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, | |
"max_grid_dim_z": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, | |
"max_shared_memory_per_block": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, | |
"total_constant_memory": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, | |
"warp_size": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, | |
"max_pitch": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, | |
"maximum_texture1d_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, | |
"maximum_texture1d_linear_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, | |
"maximum_texture1d_mipmapped_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, | |
"maximum_texture2d_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, | |
"maximum_texture2d_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, | |
"maximum_texture2d_linear_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, | |
"maximum_texture2d_linear_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, | |
"maximum_texture2d_linear_pitch": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, | |
"maximum_texture2d_mipmapped_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, | |
"maximum_texture2d_mipmapped_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, | |
"maximum_texture3d_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, | |
"maximum_texture3d_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, | |
"maximum_texture3d_depth": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, | |
"maximum_texture3d_width_alternate": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, | |
"maximum_texture3d_height_alternate": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, | |
"maximum_texture3d_depth_alternate": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, | |
"maximum_texturecubemap_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, | |
"maximum_texture1d_layered_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, | |
"maximum_texture1d_layered_layers": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, | |
"maximum_texture2d_layered_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, | |
"maximum_texture2d_layered_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, | |
"maximum_texture2d_layered_layers": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, | |
"maximum_texturecubemap_layered_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, | |
"maximum_texturecubemap_layered_layers": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, | |
"maximum_surface1d_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, | |
"maximum_surface2d_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, | |
"maximum_surface2d_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, | |
"maximum_surface3d_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, | |
"maximum_surface3d_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, | |
"maximum_surface3d_depth": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, | |
"maximum_surface1d_layered_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, | |
"maximum_surface1d_layered_layers": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, | |
"maximum_surface2d_layered_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, | |
"maximum_surface2d_layered_height": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, | |
"maximum_surface2d_layered_layers": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, | |
"maximum_surfacecubemap_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, | |
"maximum_surfacecubemap_layered_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, | |
"maximum_surfacecubemap_layered_layers": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, | |
"max_registers_per_block": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, | |
"clock_rate": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, | |
"texture_alignment": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, | |
"texture_pitch_alignment": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, | |
"gpu_overlap": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, | |
"multiprocessor_count": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, | |
"kernel_exec_timeout": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, | |
"integrated": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED, | |
"can_map_host_memory": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, | |
"compute_mode": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, | |
"concurrent_kernels": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, | |
"ecc_enabled": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, | |
"pci_bus_id": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, | |
"pci_device_id": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, | |
"pci_domain_id": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, | |
"tcc_driver": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, | |
"memory_clock_rate": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, | |
"global_memory_bus_width": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, | |
"l2_cache_size": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, | |
"max_threads_per_multiprocessor": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, | |
"unified_addressing": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, | |
"compute_capability_major": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, | |
"compute_capability_minor": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, | |
"global_l1_cache_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, | |
"local_l1_cache_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, | |
"max_shared_memory_per_multiprocessor": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, | |
"max_registers_per_multiprocessor": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, | |
"managed_memory": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, | |
"multi_gpu_board": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, | |
"multi_gpu_board_group_id": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, | |
"host_native_atomic_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, | |
"single_to_double_precision_perf_ratio": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, | |
"pageable_memory_access": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, | |
"concurrent_managed_access": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, | |
"compute_preemption_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, | |
"can_use_host_pointer_for_registered_mem": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, | |
"max_shared_memory_per_block_optin": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, | |
"pageable_memory_access_uses_host_page_tables": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, | |
"direct_managed_mem_access_from_host": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, | |
"virtual_memory_management_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, | |
"handle_type_posix_file_descriptor_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED, | |
"handle_type_win32_handle_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, | |
"handle_type_win32_kmt_handle_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED, | |
"max_blocks_per_multiprocessor": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, | |
"generic_compression_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED, | |
"max_persisting_l2_cache_size": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, | |
"max_access_policy_window_size": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, | |
"gpu_direct_rdma_with_cuda_vmm_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, | |
"reserved_shared_memory_per_block": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, | |
"sparse_cuda_array_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, | |
"read_only_host_register_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY, | |
"memory_pools_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, | |
"gpu_direct_rdma_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, | |
"gpu_direct_rdma_flush_writes_options": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, | |
"gpu_direct_rdma_writes_ordering": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, | |
"mempool_supported_handle_types": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, | |
"deferred_mapping_cuda_array_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED, | |
"numa_config": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG, | |
"numa_id": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID, | |
"multicast_supported": driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED, | |
} | |
def __init__(self): | |
raise RuntimeError("DeviceAttributes should not be instantiated directly") | |
slots = "_handle" | |
def _init(handle): | |
self = DeviceAttributes.__new__(DeviceAttributes) | |
self._handle = handle | |
return self | |
def __getattr__(self, name): | |
if name in self._attributes: | |
attr = self._attributes[name] | |
value = handle_return(driver.cuDeviceGetAttribute(attr, self._handle)) | |
if isinstance(value, int) and name in [ | |
"gpu_overlap", | |
"kernel_exec_timeout", | |
"integrated", | |
"can_map_host_memory", | |
"concurrent_kernels", | |
"ecc_enabled", | |
"tcc_driver", | |
"unified_addressing", | |
"global_l1_cache_supported", | |
"local_l1_cache_supported", | |
"managed_memory", | |
"multi_gpu_board", | |
"host_native_atomic_supported", | |
"pageable_memory_access", | |
"concurrent_managed_access", | |
"compute_preemption_supported", | |
"can_use_host_pointer_for_registered_mem", | |
"pageable_memory_access_uses_host_page_tables", | |
"direct_managed_mem_access_from_host", | |
"virtual_memory_management_supported", | |
"handle_type_posix_file_descriptor_supported", | |
"handle_type_win32_handle_supported", | |
"handle_type_win32_kmt_handle_supported", | |
"generic_compression_supported", | |
"gpu_direct_rdma_with_cuda_vmm_supported", | |
"sparse_cuda_array_supported", | |
"read_only_host_register_supported", | |
"memory_pools_supported", | |
"gpu_direct_rdma_supported", | |
"deferred_mapping_cuda_array_supported", | |
"multicast_supported", | |
]: | |
return bool(value) | |
return value | |
raise AttributeError(f"'DeviceAttributes' object has no attribute '{name}'") |
) | ||
|
||
@property | ||
def max_block_dim_x(self): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the docstring for this class to so long, you won't be able to see the description of the property you are interested in if your IDE shows docstrings on mouse hover? Why not include the docstrings for each of the properties here?
) | ||
|
||
@property | ||
def max_block_dim_y(self): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please include type hints for the return value of each of these properties? Are they all bools and ints or are some of them actually Enums?
close #210
WIP
todo: switch 1/0 ints to bool
remove the deprecated properties
handle properties not supported on older CTKs