Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
zhczhong committed Nov 11, 2024
1 parent 48090c4 commit 0cb9c87
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 16 deletions.
8 changes: 4 additions & 4 deletions test/mlir/test/gc/Integration/op/binary.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s

// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @multiply(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
Expand All @@ -16,7 +16,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
// -----

// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @add(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
Expand All @@ -30,7 +30,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @subtract(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
Expand All @@ -44,7 +44,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @divide(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
Expand Down
12 changes: 6 additions & 6 deletions test/mlir/test/gc/Integration/op/matmul.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s

// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @matmul_f16(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<4096x4096xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<4096x4096xf16>
Expand All @@ -16,7 +16,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @corner_shape_matmul_f16(%arg0: memref<521x521xf16>, %arg1: memref<521x521xf16>, %arg2: memref<521x521xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<521x521xf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<521x521xf16>
Expand All @@ -31,7 +31,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>}{
func.func @dynamic_matmul_f16(%arg0: memref<?x?xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<?x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<?x?xf16>
%c0 = arith.constant 0 : index
Expand All @@ -50,7 +50,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @matmul_bf16(%arg0: memref<4096x4096xbf16>, %arg1: memref<4096x4096xbf16>, %arg2: memref<4096x4096xbf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<4096x4096xbf16>
%1 = bufferization.to_tensor %arg1 restrict : memref<4096x4096xbf16>
Expand All @@ -65,7 +65,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @matmul_f32(%arg0: memref<4096x4096xf32>, %arg1: memref<4096x4096xf32>, %arg2: memref<4096x4096xf32>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<4096x4096xf32>
%1 = bufferization.to_tensor %arg1 restrict : memref<4096x4096xf32>
Expand All @@ -76,4 +76,4 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<4096x4096xf32>, memref<4096x4096xf32>) -> ()
return
}
}
}
11 changes: 5 additions & 6 deletions test/mlir/test/gc/Integration/op/relu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @relu_f16(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
%1 = tensor.empty() : tensor<1024x1024xf16>
Expand All @@ -17,7 +17,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @dynamic_relu(%arg0: memref<?x?xf16>, %arg1: memref<?x?xf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<?x?xf16>
%c0 = arith.constant 0 : index
Expand All @@ -35,7 +35,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @relu_bf16(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xbf16>
%1 = tensor.empty() : tensor<1024x1024xbf16>
Expand All @@ -49,7 +49,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @relu_f32(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf32>
%1 = tensor.empty() : tensor<1024x1024xf32>
Expand All @@ -63,7 +63,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C

// -----
// CHECK-LABEL: llvm
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"GPU" : #dlti.target_device_spec<#dlti.dl_entry<"num_exec_units", 448 : i32>, #dlti.dl_entry<"num_exec_units_per_slice", 32 : i32>, #dlti.dl_entry<"num_threads_per_eu", 8 : i32>, #dlti.dl_entry<"L1_cache_size_in_bytes", 67108864 : i32>, #dlti.dl_entry<"max_vector_op_width", 256 : i32>, #dlti.dl_entry<"max_work_group_size", 1024 : i32>>>} {
func.func @relu_f32_corner_shape(%arg0: memref<1061x1061xf32>, %arg1: memref<1061x1061xf32>) {
%0 = bufferization.to_tensor %arg0 restrict : memref<1061x1061xf32>
%1 = tensor.empty() : tensor<1061x1061xf32>
Expand All @@ -74,4 +74,3 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
return
}
}

0 comments on commit 0cb9c87

Please sign in to comment.