From 6f700b44fa5bbd54e9d8e47dd7880f5a91364972 Mon Sep 17 00:00:00 2001 From: Sasha Lopoukhine Date: Thu, 2 Nov 2023 18:26:12 +0000 Subject: [PATCH] generate alexnet linalg --- alexnet/iree.mlir | 243 ++++++++++++++++++++++++++++ alexnet/linalg.mlir | 335 +++++++++++++++++++++++++++++++++++++++ alexnet/requirements.txt | 2 + alexnet/run.sh | 4 + alexnet/test.py | 12 ++ 5 files changed, 596 insertions(+) create mode 100644 alexnet/iree.mlir create mode 100644 alexnet/linalg.mlir create mode 100644 alexnet/requirements.txt create mode 100644 alexnet/run.sh create mode 100644 alexnet/test.py diff --git a/alexnet/iree.mlir b/alexnet/iree.mlir new file mode 100644 index 00000000..70ba7143 --- /dev/null +++ b/alexnet/iree.mlir @@ -0,0 +1,243 @@ +module @AlexNet { + util.global private @_params.features.0.weight {noinline} = dense_resource<__elided__> : tensor<64x3x11x11xf32> + util.global private @_params.features.0.bias {noinline} = dense_resource<__elided__> : tensor<64xf32> + util.global private @_params.features.3.weight {noinline} = dense_resource<__elided__> : tensor<192x64x5x5xf32> + util.global private @_params.features.3.bias {noinline} = dense_resource<__elided__> : tensor<192xf32> + util.global private @_params.features.6.weight {noinline} = dense_resource<__elided__> : tensor<384x192x3x3xf32> + util.global private @_params.features.6.bias {noinline} = dense_resource<__elided__> : tensor<384xf32> + util.global private @_params.features.8.weight {noinline} = dense_resource<__elided__> : tensor<256x384x3x3xf32> + util.global private @_params.features.8.bias {noinline} = dense_resource<__elided__> : tensor<256xf32> + util.global private @_params.features.10.weight {noinline} = dense_resource<__elided__> : tensor<256x256x3x3xf32> + util.global private @_params.features.10.bias {noinline} = dense_resource<__elided__> : tensor<256xf32> + util.global private @_params.classifier.1.weight {noinline} = dense_resource<__elided__> : tensor<4096x9216xf32> + util.global private @_params.classifier.1.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32> + util.global private @_params.classifier.4.weight {noinline} = dense_resource<__elided__> : tensor<4096x4096xf32> + util.global private @_params.classifier.4.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32> + util.global private @_params.classifier.6.weight {noinline} = dense_resource<__elided__> : tensor<1000x4096xf32> + util.global private @_params.classifier.6.bias {noinline} = dense_resource<__elided__> : tensor<1000xf32> + func.func @main(%arg0: tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> attributes {torch.args_schema = "[1, {\22type\22: \22builtins.tuple\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: \22builtins.list\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: null, \22context\22: null, \22children_spec\22: []}]}, {\22type\22: \22builtins.dict\22, \22context\22: \22[]\22, \22children_spec\22: []}]}]", torch.return_schema = "[1, {\22type\22: null, \22context\22: null, \22children_spec\22: []}]"} { + %0 = torch_c.from_builtin_tensor %arg0 : tensor<64x3x224x224xf32> -> !torch.vtensor<[64,3,224,224],f32> + %1 = call @forward(%0) : (!torch.vtensor<[64,3,224,224],f32>) -> !torch.vtensor<[64,1000],f32> + %2 = torch_c.to_builtin_tensor %1 : !torch.vtensor<[64,1000],f32> -> tensor<64x1000xf32> + return %2 : tensor<64x1000xf32> + } + func.func private @forward(%arg0: !torch.vtensor<[64,3,224,224],f32>) -> !torch.vtensor<[64,1000],f32> { + %_params.features.0.weight = util.global.load @_params.features.0.weight : tensor<64x3x11x11xf32> + %0 = torch_c.from_builtin_tensor %_params.features.0.weight : tensor<64x3x11x11xf32> -> !torch.vtensor<[64,3,11,11],f32> + %_params.features.0.bias = util.global.load @_params.features.0.bias : tensor<64xf32> + %1 = torch_c.from_builtin_tensor %_params.features.0.bias : tensor<64xf32> -> !torch.vtensor<[64],f32> + %int4 = torch.constant.int 4 + %int4_0 = torch.constant.int 4 + %2 = torch.prim.ListConstruct %int4, %int4_0 : (!torch.int, !torch.int) -> !torch.list + %int2 = torch.constant.int 2 + %int2_1 = torch.constant.int 2 + %3 = torch.prim.ListConstruct %int2, %int2_1 : (!torch.int, !torch.int) -> !torch.list + %int1 = torch.constant.int 1 + %int1_2 = torch.constant.int 1 + %4 = torch.prim.ListConstruct %int1, %int1_2 : (!torch.int, !torch.int) -> !torch.list + %false = torch.constant.bool false + %int0 = torch.constant.int 0 + %int0_3 = torch.constant.int 0 + %5 = torch.prim.ListConstruct %int0, %int0_3 : (!torch.int, !torch.int) -> !torch.list + %int1_4 = torch.constant.int 1 + %6 = torch.aten.convolution %arg0, %0, %1, %2, %3, %4, %false, %5, %int1_4 : !torch.vtensor<[64,3,224,224],f32>, !torch.vtensor<[64,3,11,11],f32>, !torch.vtensor<[64],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[64,64,55,55],f32> + %7 = torch.aten.relu %6 : !torch.vtensor<[64,64,55,55],f32> -> !torch.vtensor<[64,64,55,55],f32> + %8 = torch.aten.detach %7 : !torch.vtensor<[64,64,55,55],f32> -> !torch.vtensor<[64,64,55,55],f32> + %int3 = torch.constant.int 3 + %int3_5 = torch.constant.int 3 + %9 = torch.prim.ListConstruct %int3, %int3_5 : (!torch.int, !torch.int) -> !torch.list + %int2_6 = torch.constant.int 2 + %int2_7 = torch.constant.int 2 + %10 = torch.prim.ListConstruct %int2_6, %int2_7 : (!torch.int, !torch.int) -> !torch.list + %int0_8 = torch.constant.int 0 + %int0_9 = torch.constant.int 0 + %11 = torch.prim.ListConstruct %int0_8, %int0_9 : (!torch.int, !torch.int) -> !torch.list + %int1_10 = torch.constant.int 1 + %int1_11 = torch.constant.int 1 + %12 = torch.prim.ListConstruct %int1_10, %int1_11 : (!torch.int, !torch.int) -> !torch.list + %false_12 = torch.constant.bool false + %result0, %result1 = torch.aten.max_pool2d_with_indices %7, %9, %10, %11, %12, %false_12 : !torch.vtensor<[64,64,55,55],f32>, !torch.list, !torch.list, !torch.list, !torch.list, !torch.bool -> !torch.vtensor<[64,64,27,27],f32>, !torch.vtensor<[64,64,27,27],si64> + %_params.features.3.weight = util.global.load @_params.features.3.weight : tensor<192x64x5x5xf32> + %13 = torch_c.from_builtin_tensor %_params.features.3.weight : tensor<192x64x5x5xf32> -> !torch.vtensor<[192,64,5,5],f32> + %_params.features.3.bias = util.global.load @_params.features.3.bias : tensor<192xf32> + %14 = torch_c.from_builtin_tensor %_params.features.3.bias : tensor<192xf32> -> !torch.vtensor<[192],f32> + %int1_13 = torch.constant.int 1 + %int1_14 = torch.constant.int 1 + %15 = torch.prim.ListConstruct %int1_13, %int1_14 : (!torch.int, !torch.int) -> !torch.list + %int2_15 = torch.constant.int 2 + %int2_16 = torch.constant.int 2 + %16 = torch.prim.ListConstruct %int2_15, %int2_16 : (!torch.int, !torch.int) -> !torch.list + %int1_17 = torch.constant.int 1 + %int1_18 = torch.constant.int 1 + %17 = torch.prim.ListConstruct %int1_17, %int1_18 : (!torch.int, !torch.int) -> !torch.list + %false_19 = torch.constant.bool false + %int0_20 = torch.constant.int 0 + %int0_21 = torch.constant.int 0 + %18 = torch.prim.ListConstruct %int0_20, %int0_21 : (!torch.int, !torch.int) -> !torch.list + %int1_22 = torch.constant.int 1 + %19 = torch.aten.convolution %result0, %13, %14, %15, %16, %17, %false_19, %18, %int1_22 : !torch.vtensor<[64,64,27,27],f32>, !torch.vtensor<[192,64,5,5],f32>, !torch.vtensor<[192],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[64,192,27,27],f32> + %20 = torch.aten.relu %19 : !torch.vtensor<[64,192,27,27],f32> -> !torch.vtensor<[64,192,27,27],f32> + %21 = torch.aten.detach %20 : !torch.vtensor<[64,192,27,27],f32> -> !torch.vtensor<[64,192,27,27],f32> + %int3_23 = torch.constant.int 3 + %int3_24 = torch.constant.int 3 + %22 = torch.prim.ListConstruct %int3_23, %int3_24 : (!torch.int, !torch.int) -> !torch.list + %int2_25 = torch.constant.int 2 + %int2_26 = torch.constant.int 2 + %23 = torch.prim.ListConstruct %int2_25, %int2_26 : (!torch.int, !torch.int) -> !torch.list + %int0_27 = torch.constant.int 0 + %int0_28 = torch.constant.int 0 + %24 = torch.prim.ListConstruct %int0_27, %int0_28 : (!torch.int, !torch.int) -> !torch.list + %int1_29 = torch.constant.int 1 + %int1_30 = torch.constant.int 1 + %25 = torch.prim.ListConstruct %int1_29, %int1_30 : (!torch.int, !torch.int) -> !torch.list + %false_31 = torch.constant.bool false + %result0_32, %result1_33 = torch.aten.max_pool2d_with_indices %20, %22, %23, %24, %25, %false_31 : !torch.vtensor<[64,192,27,27],f32>, !torch.list, !torch.list, !torch.list, !torch.list, !torch.bool -> !torch.vtensor<[64,192,13,13],f32>, !torch.vtensor<[64,192,13,13],si64> + %_params.features.6.weight = util.global.load @_params.features.6.weight : tensor<384x192x3x3xf32> + %26 = torch_c.from_builtin_tensor %_params.features.6.weight : tensor<384x192x3x3xf32> -> !torch.vtensor<[384,192,3,3],f32> + %_params.features.6.bias = util.global.load @_params.features.6.bias : tensor<384xf32> + %27 = torch_c.from_builtin_tensor %_params.features.6.bias : tensor<384xf32> -> !torch.vtensor<[384],f32> + %int1_34 = torch.constant.int 1 + %int1_35 = torch.constant.int 1 + %28 = torch.prim.ListConstruct %int1_34, %int1_35 : (!torch.int, !torch.int) -> !torch.list + %int1_36 = torch.constant.int 1 + %int1_37 = torch.constant.int 1 + %29 = torch.prim.ListConstruct %int1_36, %int1_37 : (!torch.int, !torch.int) -> !torch.list + %int1_38 = torch.constant.int 1 + %int1_39 = torch.constant.int 1 + %30 = torch.prim.ListConstruct %int1_38, %int1_39 : (!torch.int, !torch.int) -> !torch.list + %false_40 = torch.constant.bool false + %int0_41 = torch.constant.int 0 + %int0_42 = torch.constant.int 0 + %31 = torch.prim.ListConstruct %int0_41, %int0_42 : (!torch.int, !torch.int) -> !torch.list + %int1_43 = torch.constant.int 1 + %32 = torch.aten.convolution %result0_32, %26, %27, %28, %29, %30, %false_40, %31, %int1_43 : !torch.vtensor<[64,192,13,13],f32>, !torch.vtensor<[384,192,3,3],f32>, !torch.vtensor<[384],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[64,384,13,13],f32> + %33 = torch.aten.relu %32 : !torch.vtensor<[64,384,13,13],f32> -> !torch.vtensor<[64,384,13,13],f32> + %34 = torch.aten.detach %33 : !torch.vtensor<[64,384,13,13],f32> -> !torch.vtensor<[64,384,13,13],f32> + %_params.features.8.weight = util.global.load @_params.features.8.weight : tensor<256x384x3x3xf32> + %35 = torch_c.from_builtin_tensor %_params.features.8.weight : tensor<256x384x3x3xf32> -> !torch.vtensor<[256,384,3,3],f32> + %_params.features.8.bias = util.global.load @_params.features.8.bias : tensor<256xf32> + %36 = torch_c.from_builtin_tensor %_params.features.8.bias : tensor<256xf32> -> !torch.vtensor<[256],f32> + %int1_44 = torch.constant.int 1 + %int1_45 = torch.constant.int 1 + %37 = torch.prim.ListConstruct %int1_44, %int1_45 : (!torch.int, !torch.int) -> !torch.list + %int1_46 = torch.constant.int 1 + %int1_47 = torch.constant.int 1 + %38 = torch.prim.ListConstruct %int1_46, %int1_47 : (!torch.int, !torch.int) -> !torch.list + %int1_48 = torch.constant.int 1 + %int1_49 = torch.constant.int 1 + %39 = torch.prim.ListConstruct %int1_48, %int1_49 : (!torch.int, !torch.int) -> !torch.list + %false_50 = torch.constant.bool false + %int0_51 = torch.constant.int 0 + %int0_52 = torch.constant.int 0 + %40 = torch.prim.ListConstruct %int0_51, %int0_52 : (!torch.int, !torch.int) -> !torch.list + %int1_53 = torch.constant.int 1 + %41 = torch.aten.convolution %33, %35, %36, %37, %38, %39, %false_50, %40, %int1_53 : !torch.vtensor<[64,384,13,13],f32>, !torch.vtensor<[256,384,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[64,256,13,13],f32> + %42 = torch.aten.relu %41 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32> + %43 = torch.aten.detach %42 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32> + %_params.features.10.weight = util.global.load @_params.features.10.weight : tensor<256x256x3x3xf32> + %44 = torch_c.from_builtin_tensor %_params.features.10.weight : tensor<256x256x3x3xf32> -> !torch.vtensor<[256,256,3,3],f32> + %_params.features.10.bias = util.global.load @_params.features.10.bias : tensor<256xf32> + %45 = torch_c.from_builtin_tensor %_params.features.10.bias : tensor<256xf32> -> !torch.vtensor<[256],f32> + %int1_54 = torch.constant.int 1 + %int1_55 = torch.constant.int 1 + %46 = torch.prim.ListConstruct %int1_54, %int1_55 : (!torch.int, !torch.int) -> !torch.list + %int1_56 = torch.constant.int 1 + %int1_57 = torch.constant.int 1 + %47 = torch.prim.ListConstruct %int1_56, %int1_57 : (!torch.int, !torch.int) -> !torch.list + %int1_58 = torch.constant.int 1 + %int1_59 = torch.constant.int 1 + %48 = torch.prim.ListConstruct %int1_58, %int1_59 : (!torch.int, !torch.int) -> !torch.list + %false_60 = torch.constant.bool false + %int0_61 = torch.constant.int 0 + %int0_62 = torch.constant.int 0 + %49 = torch.prim.ListConstruct %int0_61, %int0_62 : (!torch.int, !torch.int) -> !torch.list + %int1_63 = torch.constant.int 1 + %50 = torch.aten.convolution %42, %44, %45, %46, %47, %48, %false_60, %49, %int1_63 : !torch.vtensor<[64,256,13,13],f32>, !torch.vtensor<[256,256,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.list, !torch.int -> !torch.vtensor<[64,256,13,13],f32> + %51 = torch.aten.relu %50 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32> + %52 = torch.aten.detach %51 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32> + %int3_64 = torch.constant.int 3 + %int3_65 = torch.constant.int 3 + %53 = torch.prim.ListConstruct %int3_64, %int3_65 : (!torch.int, !torch.int) -> !torch.list + %int2_66 = torch.constant.int 2 + %int2_67 = torch.constant.int 2 + %54 = torch.prim.ListConstruct %int2_66, %int2_67 : (!torch.int, !torch.int) -> !torch.list + %int0_68 = torch.constant.int 0 + %int0_69 = torch.constant.int 0 + %55 = torch.prim.ListConstruct %int0_68, %int0_69 : (!torch.int, !torch.int) -> !torch.list + %int1_70 = torch.constant.int 1 + %int1_71 = torch.constant.int 1 + %56 = torch.prim.ListConstruct %int1_70, %int1_71 : (!torch.int, !torch.int) -> !torch.list + %false_72 = torch.constant.bool false + %result0_73, %result1_74 = torch.aten.max_pool2d_with_indices %51, %53, %54, %55, %56, %false_72 : !torch.vtensor<[64,256,13,13],f32>, !torch.list, !torch.list, !torch.list, !torch.list, !torch.bool -> !torch.vtensor<[64,256,6,6],f32>, !torch.vtensor<[64,256,6,6],si64> + %int1_75 = torch.constant.int 1 + %int1_76 = torch.constant.int 1 + %57 = torch.prim.ListConstruct %int1_75, %int1_76 : (!torch.int, !torch.int) -> !torch.list + %int1_77 = torch.constant.int 1 + %int1_78 = torch.constant.int 1 + %58 = torch.prim.ListConstruct %int1_77, %int1_78 : (!torch.int, !torch.int) -> !torch.list + %int0_79 = torch.constant.int 0 + %int0_80 = torch.constant.int 0 + %59 = torch.prim.ListConstruct %int0_79, %int0_80 : (!torch.int, !torch.int) -> !torch.list + %false_81 = torch.constant.bool false + %true = torch.constant.bool true + %none = torch.constant.none + %60 = torch.aten.avg_pool2d %result0_73, %57, %58, %59, %false_81, %true, %none : !torch.vtensor<[64,256,6,6],f32>, !torch.list, !torch.list, !torch.list, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[64,256,6,6],f32> + %int64 = torch.constant.int 64 + %int9216 = torch.constant.int 9216 + %61 = torch.prim.ListConstruct %int64, %int9216 : (!torch.int, !torch.int) -> !torch.list + %62 = torch.aten.view %60, %61 : !torch.vtensor<[64,256,6,6],f32>, !torch.list -> !torch.vtensor<[64,9216],f32> + %float5.000000e-01 = torch.constant.float 5.000000e-01 + %true_82 = torch.constant.bool true + %result0_83, %result1_84 = torch.aten.native_dropout %62, %float5.000000e-01, %true_82 : !torch.vtensor<[64,9216],f32>, !torch.float, !torch.bool -> !torch.vtensor<[64,9216],f32>, !torch.vtensor<[64,9216],i1> + %_params.classifier.1.weight = util.global.load @_params.classifier.1.weight : tensor<4096x9216xf32> + %63 = torch_c.from_builtin_tensor %_params.classifier.1.weight : tensor<4096x9216xf32> -> !torch.vtensor<[4096,9216],f32> + %int0_85 = torch.constant.int 0 + %int1_86 = torch.constant.int 1 + %64 = torch.aten.transpose.int %63, %int0_85, %int1_86 : !torch.vtensor<[4096,9216],f32>, !torch.int, !torch.int -> !torch.vtensor<[9216,4096],f32> + %65 = torch.aten.mm %result0_83, %64 : !torch.vtensor<[64,9216],f32>, !torch.vtensor<[9216,4096],f32> -> !torch.vtensor<[64,4096],f32> + %int1_87 = torch.constant.int 1 + %66 = torch.aten.mul.Scalar %65, %int1_87 : !torch.vtensor<[64,4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32> + %_params.classifier.1.bias = util.global.load @_params.classifier.1.bias : tensor<4096xf32> + %67 = torch_c.from_builtin_tensor %_params.classifier.1.bias : tensor<4096xf32> -> !torch.vtensor<[4096],f32> + %int1_88 = torch.constant.int 1 + %68 = torch.aten.mul.Scalar %67, %int1_88 : !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[4096],f32> + %int1_89 = torch.constant.int 1 + %69 = torch.aten.add.Tensor %66, %68, %int1_89 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32> + %70 = torch.aten.relu %69 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32> + %71 = torch.aten.detach %70 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32> + %float5.000000e-01_90 = torch.constant.float 5.000000e-01 + %true_91 = torch.constant.bool true + %result0_92, %result1_93 = torch.aten.native_dropout %70, %float5.000000e-01_90, %true_91 : !torch.vtensor<[64,4096],f32>, !torch.float, !torch.bool -> !torch.vtensor<[64,4096],f32>, !torch.vtensor<[64,4096],i1> + %_params.classifier.4.weight = util.global.load @_params.classifier.4.weight : tensor<4096x4096xf32> + %72 = torch_c.from_builtin_tensor %_params.classifier.4.weight : tensor<4096x4096xf32> -> !torch.vtensor<[4096,4096],f32> + %int0_94 = torch.constant.int 0 + %int1_95 = torch.constant.int 1 + %73 = torch.aten.transpose.int %72, %int0_94, %int1_95 : !torch.vtensor<[4096,4096],f32>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f32> + %74 = torch.aten.mm %result0_92, %73 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096,4096],f32> -> !torch.vtensor<[64,4096],f32> + %int1_96 = torch.constant.int 1 + %75 = torch.aten.mul.Scalar %74, %int1_96 : !torch.vtensor<[64,4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32> + %_params.classifier.4.bias = util.global.load @_params.classifier.4.bias : tensor<4096xf32> + %76 = torch_c.from_builtin_tensor %_params.classifier.4.bias : tensor<4096xf32> -> !torch.vtensor<[4096],f32> + %int1_97 = torch.constant.int 1 + %77 = torch.aten.mul.Scalar %76, %int1_97 : !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[4096],f32> + %int1_98 = torch.constant.int 1 + %78 = torch.aten.add.Tensor %75, %77, %int1_98 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32> + %79 = torch.aten.relu %78 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32> + %80 = torch.aten.detach %79 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32> + %_params.classifier.6.weight = util.global.load @_params.classifier.6.weight : tensor<1000x4096xf32> + %81 = torch_c.from_builtin_tensor %_params.classifier.6.weight : tensor<1000x4096xf32> -> !torch.vtensor<[1000,4096],f32> + %int0_99 = torch.constant.int 0 + %int1_100 = torch.constant.int 1 + %82 = torch.aten.transpose.int %81, %int0_99, %int1_100 : !torch.vtensor<[1000,4096],f32>, !torch.int, !torch.int -> !torch.vtensor<[4096,1000],f32> + %83 = torch.aten.mm %79, %82 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096,1000],f32> -> !torch.vtensor<[64,1000],f32> + %int1_101 = torch.constant.int 1 + %84 = torch.aten.mul.Scalar %83, %int1_101 : !torch.vtensor<[64,1000],f32>, !torch.int -> !torch.vtensor<[64,1000],f32> + %_params.classifier.6.bias = util.global.load @_params.classifier.6.bias : tensor<1000xf32> + %85 = torch_c.from_builtin_tensor %_params.classifier.6.bias : tensor<1000xf32> -> !torch.vtensor<[1000],f32> + %int1_102 = torch.constant.int 1 + %86 = torch.aten.mul.Scalar %85, %int1_102 : !torch.vtensor<[1000],f32>, !torch.int -> !torch.vtensor<[1000],f32> + %int1_103 = torch.constant.int 1 + %87 = torch.aten.add.Tensor %84, %86, %int1_103 : !torch.vtensor<[64,1000],f32>, !torch.vtensor<[1000],f32>, !torch.int -> !torch.vtensor<[64,1000],f32> + return %87 : !torch.vtensor<[64,1000],f32> + } +} diff --git a/alexnet/linalg.mlir b/alexnet/linalg.mlir new file mode 100644 index 00000000..a795093f --- /dev/null +++ b/alexnet/linalg.mlir @@ -0,0 +1,335 @@ +module @AlexNet { + util.global private mutable @global_seed = dense<0> : tensor + util.global private @_params.features.0.weight {noinline} = dense_resource<__elided__> : tensor<64x3x11x11xf32> + util.global private @_params.features.0.bias {noinline} = dense_resource<__elided__> : tensor<64xf32> + util.global private @_params.features.3.weight {noinline} = dense_resource<__elided__> : tensor<192x64x5x5xf32> + util.global private @_params.features.3.bias {noinline} = dense_resource<__elided__> : tensor<192xf32> + util.global private @_params.features.6.weight {noinline} = dense_resource<__elided__> : tensor<384x192x3x3xf32> + util.global private @_params.features.6.bias {noinline} = dense_resource<__elided__> : tensor<384xf32> + util.global private @_params.features.8.weight {noinline} = dense_resource<__elided__> : tensor<256x384x3x3xf32> + util.global private @_params.features.8.bias {noinline} = dense_resource<__elided__> : tensor<256xf32> + util.global private @_params.features.10.weight {noinline} = dense_resource<__elided__> : tensor<256x256x3x3xf32> + util.global private @_params.features.10.bias {noinline} = dense_resource<__elided__> : tensor<256xf32> + util.global private @_params.classifier.1.weight {noinline} = dense_resource<__elided__> : tensor<4096x9216xf32> + util.global private @_params.classifier.1.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32> + util.global private @_params.classifier.4.weight {noinline} = dense_resource<__elided__> : tensor<4096x4096xf32> + util.global private @_params.classifier.4.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32> + util.global private @_params.classifier.6.weight {noinline} = dense_resource<__elided__> : tensor<1000x4096xf32> + util.global private @_params.classifier.6.bias {noinline} = dense_resource<__elided__> : tensor<1000xf32> + func.func @main(%arg0: tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> attributes {torch.args_schema = "[1, {\22type\22: \22builtins.tuple\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: \22builtins.list\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: null, \22context\22: null, \22children_spec\22: []}]}, {\22type\22: \22builtins.dict\22, \22context\22: \22[]\22, \22children_spec\22: []}]}]", torch.return_schema = "[1, {\22type\22: null, \22context\22: null, \22children_spec\22: []}]"} { + %0 = call @forward(%arg0) : (tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> + return %0 : tensor<64x1000xf32> + } + func.func private @forward(%arg0: tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %cst_0 = arith.constant 0xFF800000 : f32 + %c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64 + %c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64 + %c32_i64 = arith.constant 32 : i64 + %cst_1 = arith.constant 5.4210107999999998E-20 : f64 + %cst_2 = arith.constant 5.000000e-01 : f64 + %cst_3 = arith.constant 0.000000e+00 : f64 + %c2 = arith.constant 2 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c9216_i64 = arith.constant 9216 : i64 + %cst_4 = arith.constant 5.000000e-01 : f32 + %c4096_i64 = arith.constant 4096 : i64 + %_params.features.0.weight = util.global.load @_params.features.0.weight : tensor<64x3x11x11xf32> + %_params.features.0.bias = util.global.load @_params.features.0.bias : tensor<64xf32> + %padded = tensor.pad %arg0 low[0, 0, 2, 2] high[0, 0, 2, 2] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<64x3x224x224xf32> to tensor<64x3x228x228xf32> + %0 = tensor.empty() : tensor<64x64x55x55xf32> + %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.0.bias : tensor<64xf32>) outs(%0 : tensor<64x64x55x55xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<64x64x55x55xf32> + %2 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<4> : vector<2xi64>} ins(%padded, %_params.features.0.weight : tensor<64x3x228x228xf32>, tensor<64x3x11x11xf32>) outs(%1 : tensor<64x64x55x55xf32>) -> tensor<64x64x55x55xf32> + %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<64x64x55x55xf32>) outs(%0 : tensor<64x64x55x55xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x64x55x55xf32> + %4 = tensor.empty() : tensor<64x64x27x27xf32> + %5 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<64x64x27x27xf32>) -> tensor<64x64x27x27xf32> + %6 = tensor.empty() : tensor<3x3xf32> + %7 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%3, %6 : tensor<64x64x55x55xf32>, tensor<3x3xf32>) outs(%5 : tensor<64x64x27x27xf32>) -> tensor<64x64x27x27xf32> + %_params.features.3.weight = util.global.load @_params.features.3.weight : tensor<192x64x5x5xf32> + %_params.features.3.bias = util.global.load @_params.features.3.bias : tensor<192xf32> + %padded_5 = tensor.pad %7 low[0, 0, 2, 2] high[0, 0, 2, 2] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<64x64x27x27xf32> to tensor<64x64x31x31xf32> + %8 = tensor.empty() : tensor<64x192x27x27xf32> + %9 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.3.bias : tensor<192xf32>) outs(%8 : tensor<64x192x27x27xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<64x192x27x27xf32> + %10 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5, %_params.features.3.weight : tensor<64x64x31x31xf32>, tensor<192x64x5x5xf32>) outs(%9 : tensor<64x192x27x27xf32>) -> tensor<64x192x27x27xf32> + %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%10 : tensor<64x192x27x27xf32>) outs(%8 : tensor<64x192x27x27xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x192x27x27xf32> + %12 = tensor.empty() : tensor<64x192x13x13xf32> + %13 = linalg.fill ins(%cst_0 : f32) outs(%12 : tensor<64x192x13x13xf32>) -> tensor<64x192x13x13xf32> + %14 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%11, %6 : tensor<64x192x27x27xf32>, tensor<3x3xf32>) outs(%13 : tensor<64x192x13x13xf32>) -> tensor<64x192x13x13xf32> + %_params.features.6.weight = util.global.load @_params.features.6.weight : tensor<384x192x3x3xf32> + %_params.features.6.bias = util.global.load @_params.features.6.bias : tensor<384xf32> + %padded_6 = tensor.pad %14 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<64x192x13x13xf32> to tensor<64x192x15x15xf32> + %15 = tensor.empty() : tensor<64x384x13x13xf32> + %16 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.6.bias : tensor<384xf32>) outs(%15 : tensor<64x384x13x13xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<64x384x13x13xf32> + %17 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_6, %_params.features.6.weight : tensor<64x192x15x15xf32>, tensor<384x192x3x3xf32>) outs(%16 : tensor<64x384x13x13xf32>) -> tensor<64x384x13x13xf32> + %18 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%17 : tensor<64x384x13x13xf32>) outs(%15 : tensor<64x384x13x13xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x384x13x13xf32> + %_params.features.8.weight = util.global.load @_params.features.8.weight : tensor<256x384x3x3xf32> + %_params.features.8.bias = util.global.load @_params.features.8.bias : tensor<256xf32> + %padded_7 = tensor.pad %18 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<64x384x13x13xf32> to tensor<64x384x15x15xf32> + %19 = tensor.empty() : tensor<64x256x13x13xf32> + %20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.8.bias : tensor<256xf32>) outs(%19 : tensor<64x256x13x13xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<64x256x13x13xf32> + %21 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_7, %_params.features.8.weight : tensor<64x384x15x15xf32>, tensor<256x384x3x3xf32>) outs(%20 : tensor<64x256x13x13xf32>) -> tensor<64x256x13x13xf32> + %22 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%21 : tensor<64x256x13x13xf32>) outs(%19 : tensor<64x256x13x13xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x256x13x13xf32> + %_params.features.10.weight = util.global.load @_params.features.10.weight : tensor<256x256x3x3xf32> + %_params.features.10.bias = util.global.load @_params.features.10.bias : tensor<256xf32> + %padded_8 = tensor.pad %22 low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst : f32 + } : tensor<64x256x13x13xf32> to tensor<64x256x15x15xf32> + %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.10.bias : tensor<256xf32>) outs(%19 : tensor<64x256x13x13xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<64x256x13x13xf32> + %24 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_8, %_params.features.10.weight : tensor<64x256x15x15xf32>, tensor<256x256x3x3xf32>) outs(%23 : tensor<64x256x13x13xf32>) -> tensor<64x256x13x13xf32> + %25 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%24 : tensor<64x256x13x13xf32>) outs(%19 : tensor<64x256x13x13xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x256x13x13xf32> + %26 = tensor.empty() : tensor<64x256x6x6xf32> + %27 = linalg.fill ins(%cst_0 : f32) outs(%26 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32> + %28 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%25, %6 : tensor<64x256x13x13xf32>, tensor<3x3xf32>) outs(%27 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32> + %29 = linalg.fill ins(%cst : f32) outs(%26 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32> + %30 = tensor.empty() : tensor<1x1xf32> + %31 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%28, %30 : tensor<64x256x6x6xf32>, tensor<1x1xf32>) outs(%29 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32> + %collapsed = tensor.collapse_shape %31 [[0], [1, 2, 3]] : tensor<64x256x6x6xf32> into tensor<64x9216xf32> + %32 = tensor.empty() : tensor + %33 = linalg.fill ins(%cst_2 : f64) outs(%32 : tensor) -> tensor + %global_seed = util.global.load @global_seed : tensor + %extracted = tensor.extract %global_seed[] : tensor + %34 = arith.muli %extracted, %c6364136223846793005_i64 : i64 + %35 = arith.addi %34, %c1442695040888963407_i64 : i64 + %inserted = tensor.insert %35 into %global_seed[] : tensor + util.global.store %inserted, @global_seed : tensor + %36 = tensor.empty() : tensor<64x9216xf64> + %37 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%36 : tensor<64x9216xf64>) { + ^bb0(%out: f64): + %71 = linalg.index 0 : index + %72 = arith.index_cast %71 : index to i64 + %73 = linalg.index 1 : index + %74 = arith.index_cast %73 : index to i64 + %75 = arith.muli %72, %c9216_i64 : i64 + %76 = arith.addi %75, %74 : i64 + %77 = arith.muli %76, %35 : i64 + %78 = arith.addi %77, %35 : i64 + %79 = arith.muli %77, %77 : i64 + %80 = arith.addi %79, %77 : i64 + %81 = arith.shli %80, %c32_i64 : i64 + %82 = arith.shrui %80, %c32_i64 : i64 + %83 = arith.ori %81, %82 : i64 + %84 = arith.muli %83, %83 : i64 + %85 = arith.addi %84, %78 : i64 + %86 = arith.shli %85, %c32_i64 : i64 + %87 = arith.shrui %85, %c32_i64 : i64 + %88 = arith.ori %86, %87 : i64 + %89 = arith.muli %88, %88 : i64 + %90 = arith.addi %89, %77 : i64 + %91 = arith.shli %90, %c32_i64 : i64 + %92 = arith.shrui %90, %c32_i64 : i64 + %93 = arith.ori %91, %92 : i64 + %94 = arith.muli %93, %93 : i64 + %95 = arith.addi %94, %78 : i64 + %96 = arith.shli %95, %c32_i64 : i64 + %97 = arith.shrui %95, %c32_i64 : i64 + %98 = arith.ori %96, %97 : i64 + %99 = arith.muli %98, %98 : i64 + %100 = arith.addi %99, %77 : i64 + %101 = arith.shrui %100, %c32_i64 : i64 + %102 = arith.xori %95, %101 : i64 + %103 = arith.uitofp %102 : i64 to f64 + %104 = arith.mulf %103, %cst_1 : f64 + %105 = arith.addf %104, %cst_3 : f64 + linalg.yield %105 : f64 + } -> tensor<64x9216xf64> + %38 = tensor.empty() : tensor<64x9216xi1> + %39 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%37, %33 : tensor<64x9216xf64>, tensor) outs(%38 : tensor<64x9216xi1>) { + ^bb0(%in: f64, %in_12: f64, %out: i1): + %71 = arith.cmpf ult, %in, %in_12 : f64 + linalg.yield %71 : i1 + } -> tensor<64x9216xi1> + %40 = tensor.empty() : tensor<64x9216xf32> + %41 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%39 : tensor<64x9216xi1>) outs(%40 : tensor<64x9216xf32>) { + ^bb0(%in: i1, %out: f32): + %71 = arith.uitofp %in : i1 to f32 + linalg.yield %71 : f32 + } -> tensor<64x9216xf32> + %42 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%41, %collapsed : tensor<64x9216xf32>, tensor<64x9216xf32>) outs(%40 : tensor<64x9216xf32>) { + ^bb0(%in: f32, %in_12: f32, %out: f32): + %71 = arith.mulf %in, %in_12 : f32 + linalg.yield %71 : f32 + } -> tensor<64x9216xf32> + %43 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%42 : tensor<64x9216xf32>) outs(%40 : tensor<64x9216xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.divf %in, %cst_4 : f32 + linalg.yield %71 : f32 + } -> tensor<64x9216xf32> + %_params.classifier.1.weight = util.global.load @_params.classifier.1.weight : tensor<4096x9216xf32> + %44 = tensor.empty() : tensor<9216x4096xf32> + %45 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%_params.classifier.1.weight : tensor<4096x9216xf32>) outs(%44 : tensor<9216x4096xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<9216x4096xf32> + %46 = tensor.empty() : tensor<64x4096xf32> + %47 = linalg.fill ins(%cst : f32) outs(%46 : tensor<64x4096xf32>) -> tensor<64x4096xf32> + %48 = linalg.matmul ins(%43, %45 : tensor<64x9216xf32>, tensor<9216x4096xf32>) outs(%47 : tensor<64x4096xf32>) -> tensor<64x4096xf32> + %_params.classifier.1.bias = util.global.load @_params.classifier.1.bias : tensor<4096xf32> + %49 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%48, %_params.classifier.1.bias : tensor<64x4096xf32>, tensor<4096xf32>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: f32, %in_12: f32, %out: f32): + %71 = arith.addf %in, %in_12 : f32 + linalg.yield %71 : f32 + } -> tensor<64x4096xf32> + %50 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%49 : tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x4096xf32> + %global_seed_9 = util.global.load @global_seed : tensor + %extracted_10 = tensor.extract %global_seed_9[] : tensor + %51 = arith.muli %extracted_10, %c6364136223846793005_i64 : i64 + %52 = arith.addi %51, %c1442695040888963407_i64 : i64 + %inserted_11 = tensor.insert %52 into %global_seed_9[] : tensor + util.global.store %inserted_11, @global_seed : tensor + %53 = tensor.empty() : tensor<64x4096xf64> + %54 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%53 : tensor<64x4096xf64>) { + ^bb0(%out: f64): + %71 = linalg.index 0 : index + %72 = arith.index_cast %71 : index to i64 + %73 = linalg.index 1 : index + %74 = arith.index_cast %73 : index to i64 + %75 = arith.muli %72, %c4096_i64 : i64 + %76 = arith.addi %75, %74 : i64 + %77 = arith.muli %76, %52 : i64 + %78 = arith.addi %77, %52 : i64 + %79 = arith.muli %77, %77 : i64 + %80 = arith.addi %79, %77 : i64 + %81 = arith.shli %80, %c32_i64 : i64 + %82 = arith.shrui %80, %c32_i64 : i64 + %83 = arith.ori %81, %82 : i64 + %84 = arith.muli %83, %83 : i64 + %85 = arith.addi %84, %78 : i64 + %86 = arith.shli %85, %c32_i64 : i64 + %87 = arith.shrui %85, %c32_i64 : i64 + %88 = arith.ori %86, %87 : i64 + %89 = arith.muli %88, %88 : i64 + %90 = arith.addi %89, %77 : i64 + %91 = arith.shli %90, %c32_i64 : i64 + %92 = arith.shrui %90, %c32_i64 : i64 + %93 = arith.ori %91, %92 : i64 + %94 = arith.muli %93, %93 : i64 + %95 = arith.addi %94, %78 : i64 + %96 = arith.shli %95, %c32_i64 : i64 + %97 = arith.shrui %95, %c32_i64 : i64 + %98 = arith.ori %96, %97 : i64 + %99 = arith.muli %98, %98 : i64 + %100 = arith.addi %99, %77 : i64 + %101 = arith.shrui %100, %c32_i64 : i64 + %102 = arith.xori %95, %101 : i64 + %103 = arith.uitofp %102 : i64 to f64 + %104 = arith.mulf %103, %cst_1 : f64 + %105 = arith.addf %104, %cst_3 : f64 + linalg.yield %105 : f64 + } -> tensor<64x4096xf64> + %55 = tensor.empty() : tensor<64x4096xi1> + %56 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%54, %33 : tensor<64x4096xf64>, tensor) outs(%55 : tensor<64x4096xi1>) { + ^bb0(%in: f64, %in_12: f64, %out: i1): + %71 = arith.cmpf ult, %in, %in_12 : f64 + linalg.yield %71 : i1 + } -> tensor<64x4096xi1> + %57 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%56 : tensor<64x4096xi1>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: i1, %out: f32): + %71 = arith.uitofp %in : i1 to f32 + linalg.yield %71 : f32 + } -> tensor<64x4096xf32> + %58 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%57, %50 : tensor<64x4096xf32>, tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: f32, %in_12: f32, %out: f32): + %71 = arith.mulf %in, %in_12 : f32 + linalg.yield %71 : f32 + } -> tensor<64x4096xf32> + %59 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%58 : tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.divf %in, %cst_4 : f32 + linalg.yield %71 : f32 + } -> tensor<64x4096xf32> + %_params.classifier.4.weight = util.global.load @_params.classifier.4.weight : tensor<4096x4096xf32> + %60 = tensor.empty() : tensor<4096x4096xf32> + %61 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%_params.classifier.4.weight : tensor<4096x4096xf32>) outs(%60 : tensor<4096x4096xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<4096x4096xf32> + %62 = linalg.matmul ins(%59, %61 : tensor<64x4096xf32>, tensor<4096x4096xf32>) outs(%47 : tensor<64x4096xf32>) -> tensor<64x4096xf32> + %_params.classifier.4.bias = util.global.load @_params.classifier.4.bias : tensor<4096xf32> + %63 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%62, %_params.classifier.4.bias : tensor<64x4096xf32>, tensor<4096xf32>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: f32, %in_12: f32, %out: f32): + %71 = arith.addf %in, %in_12 : f32 + linalg.yield %71 : f32 + } -> tensor<64x4096xf32> + %64 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%63 : tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) { + ^bb0(%in: f32, %out: f32): + %71 = arith.cmpf ugt, %in, %cst : f32 + %72 = arith.select %71, %in, %cst : f32 + linalg.yield %72 : f32 + } -> tensor<64x4096xf32> + %_params.classifier.6.weight = util.global.load @_params.classifier.6.weight : tensor<1000x4096xf32> + %65 = tensor.empty() : tensor<4096x1000xf32> + %66 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%_params.classifier.6.weight : tensor<1000x4096xf32>) outs(%65 : tensor<4096x1000xf32>) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<4096x1000xf32> + %67 = tensor.empty() : tensor<64x1000xf32> + %68 = linalg.fill ins(%cst : f32) outs(%67 : tensor<64x1000xf32>) -> tensor<64x1000xf32> + %69 = linalg.matmul ins(%64, %66 : tensor<64x4096xf32>, tensor<4096x1000xf32>) outs(%68 : tensor<64x1000xf32>) -> tensor<64x1000xf32> + %_params.classifier.6.bias = util.global.load @_params.classifier.6.bias : tensor<1000xf32> + %70 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%69, %_params.classifier.6.bias : tensor<64x1000xf32>, tensor<1000xf32>) outs(%67 : tensor<64x1000xf32>) { + ^bb0(%in: f32, %in_12: f32, %out: f32): + %71 = arith.addf %in, %in_12 : f32 + linalg.yield %71 : f32 + } -> tensor<64x1000xf32> + return %70 : tensor<64x1000xf32> + } +} \ No newline at end of file diff --git a/alexnet/requirements.txt b/alexnet/requirements.txt new file mode 100644 index 00000000..cc6880ea --- /dev/null +++ b/alexnet/requirements.txt @@ -0,0 +1,2 @@ +shark-turbine +torchvision diff --git a/alexnet/run.sh b/alexnet/run.sh new file mode 100644 index 00000000..43103553 --- /dev/null +++ b/alexnet/run.sh @@ -0,0 +1,4 @@ +# Script that generated the mlir files in this folder + +python test.py > iree.mlir +iree-compile --iree-input-type=torch --compile-to=input iree.mlir > linalg.mlir diff --git a/alexnet/test.py b/alexnet/test.py new file mode 100644 index 00000000..f4e87762 --- /dev/null +++ b/alexnet/test.py @@ -0,0 +1,12 @@ +import torch +import torch.nn as nn + +import shark_turbine.aot as aot +from iree.compiler import compile_str + +from torchvision.models import AlexNet + +model = AlexNet() +example_x = torch.empty(64, 3, 224, 224) +exported = aot.export(model, example_x) +exported.print_readable()