1
1
// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s
2
2
3
3
// CHECK-LABEL: llvm
4
- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
4
+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
5
5
func.func @matmul_f16 (%arg0: memref <4096 x4096 xf16 >, %arg1: memref <4096 x4096 xf16 >, %arg2: memref <4096 x4096 xf16 >) {
6
6
%0 = bufferization.to_tensor %arg0 restrict : memref <4096 x4096 xf16 >
7
7
%1 = bufferization.to_tensor %arg1 restrict : memref <4096 x4096 xf16 >
@@ -16,7 +16,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
16
16
17
17
// -----
18
18
// CHECK-LABEL: llvm
19
- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
19
+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
20
20
func.func @corner_shape_matmul_f16 (%arg0: memref <521 x521 xf16 >, %arg1: memref <521 x521 xf16 >, %arg2: memref <521 x521 xf16 >) {
21
21
%0 = bufferization.to_tensor %arg0 restrict : memref <521 x521 xf16 >
22
22
%1 = bufferization.to_tensor %arg1 restrict : memref <521 x521 xf16 >
@@ -31,7 +31,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
31
31
32
32
// -----
33
33
// CHECK-LABEL: llvm
34
- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
34
+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
35
35
func.func @dynamic_matmul_f16 (%arg0: memref <?x?xf16 >, %arg1: memref <1024 x1024 xf16 >, %arg2: memref <?x1024 xf16 >) {
36
36
%0 = bufferization.to_tensor %arg0 restrict : memref <?x?xf16 >
37
37
%c0 = arith.constant 0 : index
@@ -50,7 +50,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
50
50
51
51
// -----
52
52
// CHECK-LABEL: llvm
53
- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
53
+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
54
54
func.func @matmul_bf16 (%arg0: memref <4096 x4096 xbf16 >, %arg1: memref <4096 x4096 xbf16 >, %arg2: memref <4096 x4096 xbf16 >) {
55
55
%0 = bufferization.to_tensor %arg0 restrict : memref <4096 x4096 xbf16 >
56
56
%1 = bufferization.to_tensor %arg1 restrict : memref <4096 x4096 xbf16 >
@@ -65,7 +65,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
65
65
66
66
// -----
67
67
// CHECK-LABEL: llvm
68
- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
68
+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
69
69
func.func @matmul_f32 (%arg0: memref <4096 x4096 xf32 >, %arg1: memref <4096 x4096 xf32 >, %arg2: memref <4096 x4096 xf32 >) {
70
70
%0 = bufferization.to_tensor %arg0 restrict : memref <4096 x4096 xf32 >
71
71
%1 = bufferization.to_tensor %arg1 restrict : memref <4096 x4096 xf32 >
@@ -76,4 +76,4 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
76
76
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor <4096 x4096 xf32 >, memref <4096 x4096 xf32 >) -> ()
77
77
return
78
78
}
79
- }
79
+ }
0 commit comments