diff --git a/modules/standard/GPU.chpl b/modules/standard/GPU.chpl index e8376f7690ee..1a0e5947245b 100644 --- a/modules/standard/GPU.chpl +++ b/modules/standard/GPU.chpl @@ -1216,4 +1216,121 @@ module GPU gpuInputArr = gpuOutputArr; } + proc deviceAttributes(loc) { + return new DeviceAttributes(loc); + } + + extern const CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK : c_int; + extern const CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY : c_int; + extern const CHPL_GPU_ATTRIBUTE__WARP_SIZE : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_PITCH : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK : c_int; + extern const CHPL_GPU_ATTRIBUTE__CLOCK_RATE : c_int; + extern const CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT : c_int; + extern const CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT : c_int; + extern const CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT : c_int; + extern const CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT : c_int; + extern const CHPL_GPU_ATTRIBUTE__INTEGRATED : c_int; + extern const CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY : c_int; + extern const CHPL_GPU_ATTRIBUTE__COMPUTE_MODE : c_int; + extern const CHPL_GPU_ATTRIBUTE__PROCESS : c_int; + extern const CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS : c_int; + extern const CHPL_GPU_ATTRIBUTE__ECC_ENABLED : c_int; + extern const CHPL_GPU_ATTRIBUTE__PCI_BUS_ID : c_int; + extern const CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID : c_int; + extern const CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE : c_int; + extern const CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH : c_int; + extern const CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR : c_int; + extern const CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR : c_int; + extern const CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR : c_int; + extern const CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR : c_int; + extern const CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY : c_int; + extern const CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD : c_int; + extern const CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS : c_int; + extern const CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS : c_int; + extern const CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES : c_int; + extern const CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST : c_int; + + extern proc chpl_gpu_query_attribute(dev : c_int, attribute : c_int) : c_int; + + record DeviceAttributes { + var gpuId : int; + + proc init(loc) { + if !loc.isGpu() then halt("gpuDeviceInfo must be passed gpu locale"); + this.gpuId = 0; // TODO: Should be loc.gpuId + } + + proc name : string { + extern proc chpl_gpu_name(dev : c_int, ref result : c_ptrConst(c_char)); + var ret : string; + var tmp : c_ptrConst(c_char); + + chpl_gpu_name(this.gpuId : c_int, tmp); + try! { + ret = string.createCopyingBuffer(tmp, policy=decodePolicy.escape); + } + deallocate(tmp); + + return ret; + } + + proc maxThreadsPerBlock : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK); + proc maxBlockDimX : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X); + proc maxBlockDimY : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y); + proc maxBlockDimZ : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z); + proc MaxGridDimX : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X); + proc maxGridDimY : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y); + proc maxGridDimZ : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z); + proc maxSharedMemoryPerBlock : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK); + proc totalConstantMemory : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY); + proc warpSize : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__WARP_SIZE); + proc maxPitch : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_PITCH); + proc maximumTexture1dWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH); + proc maximumTexture2dWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH); + proc maximumTexture2dHeight : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT); + proc maximumTexture3dWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH); + proc maximumTexture3dHeight : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT); + proc maximumTexture3dDepth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH); + proc maxRegistersPerBlock : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK); + proc clockRate : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CLOCK_RATE); + proc textureAlignment : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT); + proc texturePitch_alignment : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT); + proc multiprocessorCount : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT); + proc kernelExecTimeout : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT); + proc integrated : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__INTEGRATED); + proc canMapHostMemory : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY); + proc computeMode : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__COMPUTE_MODE); + proc concurrentKernels : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS); + proc eccEnabled : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__ECC_ENABLED); + proc pciBusId : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PCI_BUS_ID); + proc pciDeviceId : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID); + proc memoryClockRate : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE); + proc globalMemoryBusWidth : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH); + proc l2CacheSize : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE); + proc maxThreadsPerMultiprocessor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR); + proc computeCapabilityMajor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR); + proc computeCapabilityMinor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR); + proc maxSharedMemoryPerMultiprocessor : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR); + proc managedMemory : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY); + proc multiGpuBoard : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD); + proc pageableMemoryAccess : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS); + proc concurrentManagedAccess : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS); + proc pageableMemoryAccessUsesHostPageTables : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES); + proc directManagedMemAccessFromHost : int do return chpl_gpu_query_attribute(this.gpuId : c_int, CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST); + } } diff --git a/runtime/include/chpl-gpu-impl.h b/runtime/include/chpl-gpu-impl.h index 5adff14c484f..81cceaf800be 100644 --- a/runtime/include/chpl-gpu-impl.h +++ b/runtime/include/chpl-gpu-impl.h @@ -100,6 +100,10 @@ GPU_CUB_WRAP(DECL_ONE_SORT_IMPL, keys) #undef DECL_ONE_SORT_IMPL +void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize); + +int chpl_gpu_impl_query_attribute(int dev, int attribute); + #ifdef __cplusplus } #endif diff --git a/runtime/include/chpl-gpu.h b/runtime/include/chpl-gpu.h index 68becf511105..a2cdbe2920a6 100644 --- a/runtime/include/chpl-gpu.h +++ b/runtime/include/chpl-gpu.h @@ -201,6 +201,54 @@ GPU_CUB_WRAP(DECL_ONE_SORT, keys); #undef DECL_ONE_SORT +void chpl_gpu_name(int dev, char **result); + +extern const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK; +extern const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X; +extern const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y; +extern const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z; +extern const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X; +extern const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y; +extern const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z; +extern const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK; +extern const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY; +extern const int CHPL_GPU_ATTRIBUTE__WARP_SIZE; +extern const int CHPL_GPU_ATTRIBUTE__MAX_PITCH; +extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH; +extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH; +extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT; +extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH; +extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT; +extern const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH; +extern const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK; +extern const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE; +extern const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT; +extern const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT; +extern const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT; +extern const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT; +extern const int CHPL_GPU_ATTRIBUTE__INTEGRATED; +extern const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY; +extern const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE; +extern const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS; +extern const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED; +extern const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID; +extern const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID; +extern const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE; +extern const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH; +extern const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE; +extern const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR; +extern const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR; +extern const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR; +extern const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR; +extern const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY; +extern const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD; +extern const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS; +extern const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS; +extern const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES; +extern const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST; + +int chpl_gpu_query_attribute(int dev, int attribute); + #else // HAS_GPU_LOCALE // Provide a fallback for the chpl_assert_on_gpu function for non-GPU locales. diff --git a/runtime/src/chpl-gpu.c b/runtime/src/chpl-gpu.c index 1c5796ab45c9..e027adb68b7c 100644 --- a/runtime/src/chpl-gpu.c +++ b/runtime/src/chpl-gpu.c @@ -1582,4 +1582,15 @@ GPU_CUB_WRAP(DEF_ONE_SORT, keys) #undef DEF_ONE_SORT +void chpl_gpu_name(int dev, char **result) { + const int BUFFER_SIZE = 0xFF; + char* resultBuffer = (char *)chpl_mem_alloc(BUFFER_SIZE, CHPL_RT_MD_IO_BUFFER, __LINE__, 0); + chpl_gpu_impl_name(dev, resultBuffer, BUFFER_SIZE); + *result = resultBuffer; +} + +int chpl_gpu_query_attribute(int dev, int attribute) { + return chpl_gpu_impl_query_attribute(dev, attribute); +} + #endif diff --git a/runtime/src/gpu/amd/gpu-amd.c b/runtime/src/gpu/amd/gpu-amd.c index 25f48d109223..08afedbea33f 100644 --- a/runtime/src/gpu/amd/gpu-amd.c +++ b/runtime/src/gpu/amd/gpu-amd.c @@ -512,5 +512,58 @@ void chpl_gpu_impl_host_unregister(void* var) { ROCM_CALL(hipHostUnregister(var)); } +void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize) { + ROCM_CALL(hipDeviceGetName(resultBuffer, bufferSize, indexToDeviceID[dev])); +} + +const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK = hipDeviceAttributeMaxThreadsPerBlock; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X = hipDeviceAttributeMaxBlockDimX; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y = hipDeviceAttributeMaxBlockDimY; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z = hipDeviceAttributeMaxBlockDimZ; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X = hipDeviceAttributeMaxGridDimX; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y = hipDeviceAttributeMaxGridDimY; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z = hipDeviceAttributeMaxGridDimZ; +const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK = hipDeviceAttributeMaxSharedMemoryPerBlock; +const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY = hipDeviceAttributeTotalConstantMemory; +const int CHPL_GPU_ATTRIBUTE__WARP_SIZE = hipDeviceAttributeWarpSize; +const int CHPL_GPU_ATTRIBUTE__MAX_PITCH = hipDeviceAttributeMaxPitch; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH = hipDeviceAttributeMaxTexture1DWidth; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH = hipDeviceAttributeMaxTexture2DWidth; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT = hipDeviceAttributeMaxTexture2DHeight; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH = hipDeviceAttributeMaxTexture3DWidth; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT = hipDeviceAttributeMaxTexture3DHeight; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH = hipDeviceAttributeMaxTexture3DDepth; +const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK = hipDeviceAttributeMaxRegistersPerBlock; +const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE = hipDeviceAttributeClockRate; +const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT = hipDeviceAttributeTextureAlignment; +const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT = hipDeviceAttributeTexturePitchAlignment; +const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT = hipDeviceAttributeMultiprocessorCount; +const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT = hipDeviceAttributeKernelExecTimeout; +const int CHPL_GPU_ATTRIBUTE__INTEGRATED = hipDeviceAttributeIntegrated; +const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY = hipDeviceAttributeCanMapHostMemory; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE = hipDeviceAttributeComputeMode; +const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS = hipDeviceAttributeConcurrentKernels; +const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED = hipDeviceAttributeEccEnabled; +const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID = hipDeviceAttributePciBusId; +const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID = hipDeviceAttributePciDeviceId; +const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE = hipDeviceAttributeMemoryClockRate; +const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH = hipDeviceAttributeMemoryBusWidth; +const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE = hipDeviceAttributeL2CacheSize; +const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR = hipDeviceAttributeMaxThreadsPerMultiProcessor; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR = hipDeviceAttributeComputeCapabilityMajor; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR = hipDeviceAttributeComputeCapabilityMinor; +const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = hipDeviceAttributeMaxSharedMemoryPerMultiprocessor; +const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY = hipDeviceAttributeManagedMemory; +const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD = hipDeviceAttributeIsMultiGpuBoard; +const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS = hipDeviceAttributePageableMemoryAccess; +const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS = hipDeviceAttributeConcurrentManagedAccess; +const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = hipDeviceAttributePageableMemoryAccessUsesHostPageTables; +const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = hipDeviceAttributeDirectManagedMemAccessFromHost; + +int chpl_gpu_impl_query_attribute(int dev, int attribute) { + int res; + ROCM_CALL(hipDeviceGetAttribute(&res, attribute, indexToDeviceID[dev])); + return res; +} #endif // HAS_GPU_LOCALE diff --git a/runtime/src/gpu/cpu/gpu-cpu.c b/runtime/src/gpu/cpu/gpu-cpu.c index 8c7ffb2ca3f8..d81527b2683a 100644 --- a/runtime/src/gpu/cpu/gpu-cpu.c +++ b/runtime/src/gpu/cpu/gpu-cpu.c @@ -201,4 +201,59 @@ void* chpl_gpu_impl_host_register(void* var, size_t size) { return var; } void chpl_gpu_impl_host_unregister(void* var) { } #undef DEF_ONE_SORT + +void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize) { + strcpy(resultBuffer, "chapel-cpu-as-device-gpu"); +} + +const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK = 0; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X = 1; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y = 2; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z = 3; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X = 4; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y = 5; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z = 6; +const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK = 7; +const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY = 8; +const int CHPL_GPU_ATTRIBUTE__WARP_SIZE = 9; +const int CHPL_GPU_ATTRIBUTE__MAX_PITCH = 10; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH = 11; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH = 12; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT = 13; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH = 14; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT = 15; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH = 16; +const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK = 17; +const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE = 18; +const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT = 19; +const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT = 20; +const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT = 21; +const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT = 22; +const int CHPL_GPU_ATTRIBUTE__INTEGRATED = 23; +const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY = 24; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE = 25; +const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS = 26; +const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED = 27; +const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID = 28; +const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID = 29; +const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE = 30; +const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH = 31; +const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE = 32; +const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR = 33; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR = 34; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR = 35; +const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 36; +const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY = 37; +const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD = 38; +const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS = 39; +const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS = 40; +const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 41; +const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 42; + +int chpl_gpu_impl_query_attribute(int dev, int attribute) { + chpl_warning( + "querying gpu attributes is currently unsupported in cpu-as-device mode.",0,0); + return -1; +} + #endif // HAS_GPU_LOCALE diff --git a/runtime/src/gpu/nvidia/gpu-nvidia.c b/runtime/src/gpu/nvidia/gpu-nvidia.c index d7e93173f3b5..95d0f13c6681 100644 --- a/runtime/src/gpu/nvidia/gpu-nvidia.c +++ b/runtime/src/gpu/nvidia/gpu-nvidia.c @@ -461,4 +461,58 @@ void chpl_gpu_impl_host_unregister(void* var) { CUDA_CALL(cuMemHostUnregister(var)); } +void chpl_gpu_impl_name(int dev, char *resultBuffer, int bufferSize) { + CUDA_CALL(cuDeviceGetName(resultBuffer, bufferSize, chpl_gpu_devices[dev])); +} + +const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_BLOCK = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_X = CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Y = CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y; +const int CHPL_GPU_ATTRIBUTE__MAX_BLOCK_DIM_Z = CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_X = CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Y = CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y; +const int CHPL_GPU_ATTRIBUTE__MAX_GRID_DIM_Z = CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z; +const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_BLOCK = CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK; +const int CHPL_GPU_ATTRIBUTE__TOTAL_CONSTANT_MEMORY = CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY; +const int CHPL_GPU_ATTRIBUTE__WARP_SIZE = CU_DEVICE_ATTRIBUTE_WARP_SIZE; +const int CHPL_GPU_ATTRIBUTE__MAX_PITCH = CU_DEVICE_ATTRIBUTE_MAX_PITCH; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE1D_WIDTH = CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_WIDTH = CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE2D_HEIGHT = CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_WIDTH = CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_HEIGHT = CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT; +const int CHPL_GPU_ATTRIBUTE__MAXIMUM_TEXTURE3D_DEPTH = CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH; +const int CHPL_GPU_ATTRIBUTE__MAX_REGISTERS_PER_BLOCK = CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK; +const int CHPL_GPU_ATTRIBUTE__CLOCK_RATE = CU_DEVICE_ATTRIBUTE_CLOCK_RATE; +const int CHPL_GPU_ATTRIBUTE__TEXTURE_ALIGNMENT = CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT; +const int CHPL_GPU_ATTRIBUTE__TEXTURE_PITCH_ALIGNMENT = CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT; +const int CHPL_GPU_ATTRIBUTE__MULTIPROCESSOR_COUNT = CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT; +const int CHPL_GPU_ATTRIBUTE__KERNEL_EXEC_TIMEOUT = CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT; +const int CHPL_GPU_ATTRIBUTE__INTEGRATED = CU_DEVICE_ATTRIBUTE_INTEGRATED; +const int CHPL_GPU_ATTRIBUTE__CAN_MAP_HOST_MEMORY = CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_MODE = CU_DEVICE_ATTRIBUTE_COMPUTE_MODE; +const int CHPL_GPU_ATTRIBUTE__CONCURRENT_KERNELS = CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS; +const int CHPL_GPU_ATTRIBUTE__ECC_ENABLED = CU_DEVICE_ATTRIBUTE_ECC_ENABLED; +const int CHPL_GPU_ATTRIBUTE__PCI_BUS_ID = CU_DEVICE_ATTRIBUTE_PCI_BUS_ID; +const int CHPL_GPU_ATTRIBUTE__PCI_DEVICE_ID = CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID; +const int CHPL_GPU_ATTRIBUTE__MEMORY_CLOCK_RATE = CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE; +const int CHPL_GPU_ATTRIBUTE__GLOBAL_MEMORY_BUS_WIDTH = CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH; +const int CHPL_GPU_ATTRIBUTE__L2_CACHE_SIZE = CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE; +const int CHPL_GPU_ATTRIBUTE__MAX_THREADS_PER_MULTIPROCESSOR = CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MAJOR = CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR; +const int CHPL_GPU_ATTRIBUTE__COMPUTE_CAPABILITY_MINOR = CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR; +const int CHPL_GPU_ATTRIBUTE__MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR; +const int CHPL_GPU_ATTRIBUTE__MANAGED_MEMORY = CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY; +const int CHPL_GPU_ATTRIBUTE__MULTI_GPU_BOARD = CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD; +const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS = CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS; +const int CHPL_GPU_ATTRIBUTE__CONCURRENT_MANAGED_ACCESS = CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS; +const int CHPL_GPU_ATTRIBUTE__PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES; +const int CHPL_GPU_ATTRIBUTE__DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST; + +int chpl_gpu_impl_query_attribute(int dev, int attribute) { + int res; + CUDA_CALL(cuDeviceGetAttribute(&res, attribute, chpl_gpu_devices[dev])); + return res; +} + #endif // HAS_GPU_LOCALE diff --git a/test/gpu/native/deviceAttributes.chpl b/test/gpu/native/deviceAttributes.chpl new file mode 100644 index 000000000000..dcbcd7c360d0 --- /dev/null +++ b/test/gpu/native/deviceAttributes.chpl @@ -0,0 +1,55 @@ +use GPU; + +config const runBaseline = false; + +extern proc runBaselineVersion(); + +if runBaseline then runBaselineVersion(); +else { + on here.gpus[0] { + writeln("name: ", deviceAttributes(here).name); + writeln("maxThreadsPerBlock: ", deviceAttributes(here).maxThreadsPerBlock); + writeln("maxBlockDimX: ", deviceAttributes(here).maxBlockDimX); + writeln("maxBlockDimY: ", deviceAttributes(here).maxBlockDimY); + writeln("maxBlockDimZ: ", deviceAttributes(here).maxBlockDimZ); + writeln("MaxGridDimX: ", deviceAttributes(here).MaxGridDimX); + writeln("maxGridDimY: ", deviceAttributes(here).maxGridDimY); + writeln("maxGridDimZ: ", deviceAttributes(here).maxGridDimZ); + writeln("maxSharedMemoryPerBlock: ", deviceAttributes(here).maxSharedMemoryPerBlock); + writeln("totalConstantMemory: ", deviceAttributes(here).totalConstantMemory); + writeln("warpSize: ", deviceAttributes(here).warpSize); + writeln("maxPitch: ", deviceAttributes(here).maxPitch); + writeln("maximumTexture1dWidth: ", deviceAttributes(here).maximumTexture1dWidth); + writeln("maximumTexture2dWidth: ", deviceAttributes(here).maximumTexture2dWidth); + writeln("maximumTexture2dHeight: ", deviceAttributes(here).maximumTexture2dHeight); + writeln("maximumTexture3dWidth: ", deviceAttributes(here).maximumTexture3dWidth); + writeln("maximumTexture3dHeight: ", deviceAttributes(here).maximumTexture3dHeight); + writeln("maximumTexture3dDepth: ", deviceAttributes(here).maximumTexture3dDepth); + writeln("maxRegistersPerBlock: ", deviceAttributes(here).maxRegistersPerBlock); + writeln("clockRate: ", deviceAttributes(here).clockRate); + writeln("textureAlignment: ", deviceAttributes(here).textureAlignment); + writeln("texturePitch_alignment: ", deviceAttributes(here).texturePitch_alignment); + writeln("multiprocessorCount: ", deviceAttributes(here).multiprocessorCount); + writeln("kernelExecTimeout: ", deviceAttributes(here).kernelExecTimeout); + writeln("integrated: ", deviceAttributes(here).integrated); + writeln("canMapHostMemory: ", deviceAttributes(here).canMapHostMemory); + writeln("computeMode: ", deviceAttributes(here).computeMode); + writeln("concurrentKernels: ", deviceAttributes(here).concurrentKernels); + writeln("eccEnabled: ", deviceAttributes(here).eccEnabled); + writeln("pciBusId: ", deviceAttributes(here).pciBusId); + writeln("pciDeviceId: ", deviceAttributes(here).pciDeviceId); + writeln("memoryClockRate: ", deviceAttributes(here).memoryClockRate); + writeln("globalMemoryBusWidth: ", deviceAttributes(here).globalMemoryBusWidth); + writeln("l2CacheSize: ", deviceAttributes(here).l2CacheSize); + writeln("maxThreadsPerMultiprocessor: ", deviceAttributes(here).maxThreadsPerMultiprocessor); + writeln("computeCapabilityMajor: ", deviceAttributes(here).computeCapabilityMajor); + writeln("computeCapabilityMinor: ", deviceAttributes(here).computeCapabilityMinor); + writeln("maxSharedMemoryPerMultiprocessor: ", deviceAttributes(here).maxSharedMemoryPerMultiprocessor); + writeln("managedMemory: ", deviceAttributes(here).managedMemory); + writeln("multiGpuBoard: ", deviceAttributes(here).multiGpuBoard); + writeln("pageableMemoryAccess: ", deviceAttributes(here).pageableMemoryAccess); + writeln("concurrentManagedAccess: ", deviceAttributes(here).concurrentManagedAccess); + writeln("pageableMemoryAccessUsesHostPageTables: ", deviceAttributes(here).pageableMemoryAccessUsesHostPageTables); + writeln("directManagedMemAccessFromHost: ", deviceAttributes(here).directManagedMemAccessFromHost); + } +} diff --git a/test/gpu/native/deviceAttributes.compopts b/test/gpu/native/deviceAttributes.compopts new file mode 100755 index 000000000000..d0777b91a9b8 --- /dev/null +++ b/test/gpu/native/deviceAttributes.compopts @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import os + +chpl_gpu = os.getenv('CHPL_GPU') + +res = 'deviceAttributes.h' +if chpl_gpu == 'amd': + res += ' deviceAttributes.hip.c' +elif chpl_gpu == 'nvidia': + res += ' deviceAttributes.cuda.c' +elif chpl_gpu == 'cpu': + res += ' deviceAttributes.cpu.c' + +print(res) diff --git a/test/gpu/native/deviceAttributes.cpu.c b/test/gpu/native/deviceAttributes.cpu.c new file mode 100644 index 000000000000..4c34d57969b6 --- /dev/null +++ b/test/gpu/native/deviceAttributes.cpu.c @@ -0,0 +1,92 @@ +#include "deviceAttributes.h" +#include + +void runBaselineVersion(void) { + printf("name: chapel-cpu-as-device-gpu\n"); + printf("maxThreadsPerBlock: -1\n"); + printf("maxBlockDimX: -1\n"); + printf("maxBlockDimY: -1\n"); + printf("maxBlockDimZ: -1\n"); + printf("MaxGridDimX: -1\n"); + printf("maxGridDimY: -1\n"); + printf("maxGridDimZ: -1\n"); + printf("maxSharedMemoryPerBlock: -1\n"); + printf("totalConstantMemory: -1\n"); + printf("warpSize: -1\n"); + printf("maxPitch: -1\n"); + printf("maximumTexture1dWidth: -1\n"); + printf("maximumTexture2dWidth: -1\n"); + printf("maximumTexture2dHeight: -1\n"); + printf("maximumTexture3dWidth: -1\n"); + printf("maximumTexture3dHeight: -1\n"); + printf("maximumTexture3dDepth: -1\n"); + printf("maxRegistersPerBlock: -1\n"); + printf("clockRate: -1\n"); + printf("textureAlignment: -1\n"); + printf("texturePitch_alignment: -1\n"); + printf("multiprocessorCount: -1\n"); + printf("kernelExecTimeout: -1\n"); + printf("integrated: -1\n"); + printf("canMapHostMemory: -1\n"); + printf("computeMode: -1\n"); + printf("concurrentKernels: -1\n"); + printf("eccEnabled: -1\n"); + printf("pciBusId: -1\n"); + printf("pciDeviceId: -1\n"); + printf("memoryClockRate: -1\n"); + printf("globalMemoryBusWidth: -1\n"); + printf("l2CacheSize: -1\n"); + printf("maxThreadsPerMultiprocessor: -1\n"); + printf("computeCapabilityMajor: -1\n"); + printf("computeCapabilityMinor: -1\n"); + printf("maxSharedMemoryPerMultiprocessor: -1\n"); + printf("managedMemory: -1\n"); + printf("multiGpuBoard: -1\n"); + printf("pageableMemoryAccess: -1\n"); + printf("concurrentManagedAccess: -1\n"); + printf("pageableMemoryAccessUsesHostPageTables: -1\n"); + printf("directManagedMemAccessFromHost: -1\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); + printf("warning: querying gpu attributes is currently unsupported in cpu-as-device mode.\n"); +} diff --git a/test/gpu/native/deviceAttributes.cuda.c b/test/gpu/native/deviceAttributes.cuda.c new file mode 100644 index 000000000000..3bffaa201e95 --- /dev/null +++ b/test/gpu/native/deviceAttributes.cuda.c @@ -0,0 +1,64 @@ +#include "deviceAttributes.h" +#include +#include + +CUdevice device; + +static void reportAttribute(const char *name, CUdevice_attribute attr) { + int val; + cuDeviceGetAttribute(&val, attr, device); + printf("%s: %d\n", name, val); +} + +void runBaselineVersion(void) { + cuInit(0); + cuDeviceGet(&device, 0); + + char name[0xFF]; + cuDeviceGetName(name, 0xFF, device); + printf("name: %s\n", name); + + reportAttribute("maxThreadsPerBlock", CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK); + reportAttribute("maxBlockDimX", CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X); + reportAttribute("maxBlockDimY", CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y); + reportAttribute("maxBlockDimZ", CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z); + reportAttribute("MaxGridDimX", CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X); + reportAttribute("maxGridDimY", CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y); + reportAttribute("maxGridDimZ", CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z); + reportAttribute("maxSharedMemoryPerBlock", CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK); + reportAttribute("totalConstantMemory", CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY); + reportAttribute("warpSize", CU_DEVICE_ATTRIBUTE_WARP_SIZE); + reportAttribute("maxPitch", CU_DEVICE_ATTRIBUTE_MAX_PITCH); + reportAttribute("maximumTexture1dWidth", CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH); + reportAttribute("maximumTexture2dWidth", CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH); + reportAttribute("maximumTexture2dHeight", CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT); + reportAttribute("maximumTexture3dWidth", CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH); + reportAttribute("maximumTexture3dHeight", CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT); + reportAttribute("maximumTexture3dDepth", CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH); + reportAttribute("maxRegistersPerBlock", CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK); + reportAttribute("clockRate", CU_DEVICE_ATTRIBUTE_CLOCK_RATE); + reportAttribute("textureAlignment", CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT); + reportAttribute("texturePitch_alignment", CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT); + reportAttribute("multiprocessorCount", CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT); + reportAttribute("kernelExecTimeout", CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT); + reportAttribute("integrated", CU_DEVICE_ATTRIBUTE_INTEGRATED); + reportAttribute("canMapHostMemory", CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY); + reportAttribute("computeMode", CU_DEVICE_ATTRIBUTE_COMPUTE_MODE); + reportAttribute("concurrentKernels", CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS); + reportAttribute("eccEnabled", CU_DEVICE_ATTRIBUTE_ECC_ENABLED); + reportAttribute("pciBusId", CU_DEVICE_ATTRIBUTE_PCI_BUS_ID); + reportAttribute("pciDeviceId", CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID); + reportAttribute("memoryClockRate", CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE); + reportAttribute("globalMemoryBusWidth", CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH); + reportAttribute("l2CacheSize", CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE); + reportAttribute("maxThreadsPerMultiprocessor", CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR); + reportAttribute("computeCapabilityMajor", CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR); + reportAttribute("computeCapabilityMinor", CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR); + reportAttribute("maxSharedMemoryPerMultiprocessor", CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR); + reportAttribute("managedMemory", CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY); + reportAttribute("multiGpuBoard", CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD); + reportAttribute("pageableMemoryAccess", CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS); + reportAttribute("concurrentManagedAccess", CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS); + reportAttribute("pageableMemoryAccessUsesHostPageTables", CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES); + reportAttribute("directManagedMemAccessFromHost", CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST); +} diff --git a/test/gpu/native/deviceAttributes.h b/test/gpu/native/deviceAttributes.h new file mode 100644 index 000000000000..d700c4c45142 --- /dev/null +++ b/test/gpu/native/deviceAttributes.h @@ -0,0 +1 @@ +void runBaselineVersion(void); diff --git a/test/gpu/native/deviceAttributes.hip.c b/test/gpu/native/deviceAttributes.hip.c new file mode 100644 index 000000000000..4730f05abbc9 --- /dev/null +++ b/test/gpu/native/deviceAttributes.hip.c @@ -0,0 +1,69 @@ +#include "deviceAttributes.h" + +#ifndef __HIP_PLATFORM_AMD__ +#define __HIP_PLATFORM_AMD__ +#endif +#include +#include +#include + +hipDevice_t device; + +static void reportAttribute(const char *name, hipDeviceAttribute_t attr) { + int val; + hipDeviceGetAttribute(&val, attr, device); + printf("%s: %d\n", name, val); +} + +void runBaselineVersion(void) { + hipInit(0); + hipDeviceGet(&device, 0); + + char name[0xFF]; + hipDeviceGetName(name, 0xFF, device); + printf("name: %s\n", name); + + reportAttribute("maxThreadsPerBlock", hipDeviceAttributeMaxThreadsPerBlock); + reportAttribute("maxBlockDimX", hipDeviceAttributeMaxBlockDimX); + reportAttribute("maxBlockDimY", hipDeviceAttributeMaxBlockDimY); + reportAttribute("maxBlockDimZ", hipDeviceAttributeMaxBlockDimZ); + reportAttribute("MaxGridDimX", hipDeviceAttributeMaxGridDimX); + reportAttribute("maxGridDimY", hipDeviceAttributeMaxGridDimY); + reportAttribute("maxGridDimZ", hipDeviceAttributeMaxGridDimZ); + reportAttribute("maxSharedMemoryPerBlock", hipDeviceAttributeMaxSharedMemoryPerBlock); + reportAttribute("totalConstantMemory", hipDeviceAttributeTotalConstantMemory); + reportAttribute("warpSize", hipDeviceAttributeWarpSize); + reportAttribute("maxPitch", hipDeviceAttributeMaxPitch); + reportAttribute("maximumTexture1dWidth", hipDeviceAttributeMaxTexture1DWidth); + reportAttribute("maximumTexture2dWidth", hipDeviceAttributeMaxTexture2DWidth); + reportAttribute("maximumTexture2dHeight", hipDeviceAttributeMaxTexture2DHeight); + reportAttribute("maximumTexture3dWidth", hipDeviceAttributeMaxTexture3DWidth); + reportAttribute("maximumTexture3dHeight", hipDeviceAttributeMaxTexture3DHeight); + reportAttribute("maximumTexture3dDepth", hipDeviceAttributeMaxTexture3DDepth); + reportAttribute("maxRegistersPerBlock", hipDeviceAttributeMaxRegistersPerBlock); + reportAttribute("clockRate", hipDeviceAttributeClockRate); + reportAttribute("textureAlignment", hipDeviceAttributeTextureAlignment); + reportAttribute("texturePitch_alignment", hipDeviceAttributeTexturePitchAlignment); + reportAttribute("multiprocessorCount", hipDeviceAttributeMultiprocessorCount); + reportAttribute("kernelExecTimeout", hipDeviceAttributeKernelExecTimeout); + reportAttribute("integrated", hipDeviceAttributeIntegrated); + reportAttribute("canMapHostMemory", hipDeviceAttributeCanMapHostMemory); + reportAttribute("computeMode", hipDeviceAttributeComputeMode); + reportAttribute("concurrentKernels", hipDeviceAttributeConcurrentKernels); + reportAttribute("eccEnabled", hipDeviceAttributeEccEnabled); + reportAttribute("pciBusId", hipDeviceAttributePciBusId); + reportAttribute("pciDeviceId", hipDeviceAttributePciDeviceId); + reportAttribute("memoryClockRate", hipDeviceAttributeMemoryClockRate); + reportAttribute("globalMemoryBusWidth", hipDeviceAttributeMemoryBusWidth); + reportAttribute("l2CacheSize", hipDeviceAttributeL2CacheSize); + reportAttribute("maxThreadsPerMultiprocessor", hipDeviceAttributeMaxThreadsPerMultiProcessor); + reportAttribute("computeCapabilityMajor", hipDeviceAttributeComputeCapabilityMajor); + reportAttribute("computeCapabilityMinor", hipDeviceAttributeComputeCapabilityMinor); + reportAttribute("maxSharedMemoryPerMultiprocessor", hipDeviceAttributeMaxSharedMemoryPerMultiprocessor); + reportAttribute("managedMemory", hipDeviceAttributeManagedMemory); + reportAttribute("multiGpuBoard", hipDeviceAttributeIsMultiGpuBoard); + reportAttribute("pageableMemoryAccess", hipDeviceAttributePageableMemoryAccess); + reportAttribute("concurrentManagedAccess", hipDeviceAttributeConcurrentManagedAccess); + reportAttribute("pageableMemoryAccessUsesHostPageTables", hipDeviceAttributePageableMemoryAccessUsesHostPageTables); + reportAttribute("directManagedMemAccessFromHost", hipDeviceAttributeDirectManagedMemAccessFromHost); +} diff --git a/test/gpu/native/deviceAttributes.prediff b/test/gpu/native/deviceAttributes.prediff new file mode 100755 index 000000000000..9e64748f96ee --- /dev/null +++ b/test/gpu/native/deviceAttributes.prediff @@ -0,0 +1,3 @@ +#!/bin/sh + +./deviceAttributes --runBaseline=true > deviceAttributes.good