From 6f700b44fa5bbd54e9d8e47dd7880f5a91364972 Mon Sep 17 00:00:00 2001
From: Sasha Lopoukhine <superlopuh@gmail.com>
Date: Thu, 2 Nov 2023 18:26:12 +0000
Subject: [PATCH] generate alexnet linalg

---
 alexnet/iree.mlir        | 243 ++++++++++++++++++++++++++++
 alexnet/linalg.mlir      | 335 +++++++++++++++++++++++++++++++++++++++
 alexnet/requirements.txt |   2 +
 alexnet/run.sh           |   4 +
 alexnet/test.py          |  12 ++
 5 files changed, 596 insertions(+)
 create mode 100644 alexnet/iree.mlir
 create mode 100644 alexnet/linalg.mlir
 create mode 100644 alexnet/requirements.txt
 create mode 100644 alexnet/run.sh
 create mode 100644 alexnet/test.py

diff --git a/alexnet/iree.mlir b/alexnet/iree.mlir
new file mode 100644
index 00000000..70ba7143
--- /dev/null
+++ b/alexnet/iree.mlir
@@ -0,0 +1,243 @@
+module @AlexNet {
+  util.global private @_params.features.0.weight {noinline} = dense_resource<__elided__> : tensor<64x3x11x11xf32>
+  util.global private @_params.features.0.bias {noinline} = dense_resource<__elided__> : tensor<64xf32>
+  util.global private @_params.features.3.weight {noinline} = dense_resource<__elided__> : tensor<192x64x5x5xf32>
+  util.global private @_params.features.3.bias {noinline} = dense_resource<__elided__> : tensor<192xf32>
+  util.global private @_params.features.6.weight {noinline} = dense_resource<__elided__> : tensor<384x192x3x3xf32>
+  util.global private @_params.features.6.bias {noinline} = dense_resource<__elided__> : tensor<384xf32>
+  util.global private @_params.features.8.weight {noinline} = dense_resource<__elided__> : tensor<256x384x3x3xf32>
+  util.global private @_params.features.8.bias {noinline} = dense_resource<__elided__> : tensor<256xf32>
+  util.global private @_params.features.10.weight {noinline} = dense_resource<__elided__> : tensor<256x256x3x3xf32>
+  util.global private @_params.features.10.bias {noinline} = dense_resource<__elided__> : tensor<256xf32>
+  util.global private @_params.classifier.1.weight {noinline} = dense_resource<__elided__> : tensor<4096x9216xf32>
+  util.global private @_params.classifier.1.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32>
+  util.global private @_params.classifier.4.weight {noinline} = dense_resource<__elided__> : tensor<4096x4096xf32>
+  util.global private @_params.classifier.4.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32>
+  util.global private @_params.classifier.6.weight {noinline} = dense_resource<__elided__> : tensor<1000x4096xf32>
+  util.global private @_params.classifier.6.bias {noinline} = dense_resource<__elided__> : tensor<1000xf32>
+  func.func @main(%arg0: tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> attributes {torch.args_schema = "[1, {\22type\22: \22builtins.tuple\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: \22builtins.list\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: null, \22context\22: null, \22children_spec\22: []}]}, {\22type\22: \22builtins.dict\22, \22context\22: \22[]\22, \22children_spec\22: []}]}]", torch.return_schema = "[1, {\22type\22: null, \22context\22: null, \22children_spec\22: []}]"} {
+    %0 = torch_c.from_builtin_tensor %arg0 : tensor<64x3x224x224xf32> -> !torch.vtensor<[64,3,224,224],f32>
+    %1 = call @forward(%0) : (!torch.vtensor<[64,3,224,224],f32>) -> !torch.vtensor<[64,1000],f32>
+    %2 = torch_c.to_builtin_tensor %1 : !torch.vtensor<[64,1000],f32> -> tensor<64x1000xf32>
+    return %2 : tensor<64x1000xf32>
+  }
+  func.func private @forward(%arg0: !torch.vtensor<[64,3,224,224],f32>) -> !torch.vtensor<[64,1000],f32> {
+    %_params.features.0.weight = util.global.load @_params.features.0.weight : tensor<64x3x11x11xf32>
+    %0 = torch_c.from_builtin_tensor %_params.features.0.weight : tensor<64x3x11x11xf32> -> !torch.vtensor<[64,3,11,11],f32>
+    %_params.features.0.bias = util.global.load @_params.features.0.bias : tensor<64xf32>
+    %1 = torch_c.from_builtin_tensor %_params.features.0.bias : tensor<64xf32> -> !torch.vtensor<[64],f32>
+    %int4 = torch.constant.int 4
+    %int4_0 = torch.constant.int 4
+    %2 = torch.prim.ListConstruct %int4, %int4_0 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int2 = torch.constant.int 2
+    %int2_1 = torch.constant.int 2
+    %3 = torch.prim.ListConstruct %int2, %int2_1 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1 = torch.constant.int 1
+    %int1_2 = torch.constant.int 1
+    %4 = torch.prim.ListConstruct %int1, %int1_2 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false = torch.constant.bool false
+    %int0 = torch.constant.int 0
+    %int0_3 = torch.constant.int 0
+    %5 = torch.prim.ListConstruct %int0, %int0_3 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_4 = torch.constant.int 1
+    %6 = torch.aten.convolution %arg0, %0, %1, %2, %3, %4, %false, %5, %int1_4 : !torch.vtensor<[64,3,224,224],f32>, !torch.vtensor<[64,3,11,11],f32>, !torch.vtensor<[64],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[64,64,55,55],f32>
+    %7 = torch.aten.relu %6 : !torch.vtensor<[64,64,55,55],f32> -> !torch.vtensor<[64,64,55,55],f32>
+    %8 = torch.aten.detach %7 : !torch.vtensor<[64,64,55,55],f32> -> !torch.vtensor<[64,64,55,55],f32>
+    %int3 = torch.constant.int 3
+    %int3_5 = torch.constant.int 3
+    %9 = torch.prim.ListConstruct %int3, %int3_5 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int2_6 = torch.constant.int 2
+    %int2_7 = torch.constant.int 2
+    %10 = torch.prim.ListConstruct %int2_6, %int2_7 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int0_8 = torch.constant.int 0
+    %int0_9 = torch.constant.int 0
+    %11 = torch.prim.ListConstruct %int0_8, %int0_9 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_10 = torch.constant.int 1
+    %int1_11 = torch.constant.int 1
+    %12 = torch.prim.ListConstruct %int1_10, %int1_11 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_12 = torch.constant.bool false
+    %result0, %result1 = torch.aten.max_pool2d_with_indices %7, %9, %10, %11, %12, %false_12 : !torch.vtensor<[64,64,55,55],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool -> !torch.vtensor<[64,64,27,27],f32>, !torch.vtensor<[64,64,27,27],si64>
+    %_params.features.3.weight = util.global.load @_params.features.3.weight : tensor<192x64x5x5xf32>
+    %13 = torch_c.from_builtin_tensor %_params.features.3.weight : tensor<192x64x5x5xf32> -> !torch.vtensor<[192,64,5,5],f32>
+    %_params.features.3.bias = util.global.load @_params.features.3.bias : tensor<192xf32>
+    %14 = torch_c.from_builtin_tensor %_params.features.3.bias : tensor<192xf32> -> !torch.vtensor<[192],f32>
+    %int1_13 = torch.constant.int 1
+    %int1_14 = torch.constant.int 1
+    %15 = torch.prim.ListConstruct %int1_13, %int1_14 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int2_15 = torch.constant.int 2
+    %int2_16 = torch.constant.int 2
+    %16 = torch.prim.ListConstruct %int2_15, %int2_16 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_17 = torch.constant.int 1
+    %int1_18 = torch.constant.int 1
+    %17 = torch.prim.ListConstruct %int1_17, %int1_18 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_19 = torch.constant.bool false
+    %int0_20 = torch.constant.int 0
+    %int0_21 = torch.constant.int 0
+    %18 = torch.prim.ListConstruct %int0_20, %int0_21 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_22 = torch.constant.int 1
+    %19 = torch.aten.convolution %result0, %13, %14, %15, %16, %17, %false_19, %18, %int1_22 : !torch.vtensor<[64,64,27,27],f32>, !torch.vtensor<[192,64,5,5],f32>, !torch.vtensor<[192],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[64,192,27,27],f32>
+    %20 = torch.aten.relu %19 : !torch.vtensor<[64,192,27,27],f32> -> !torch.vtensor<[64,192,27,27],f32>
+    %21 = torch.aten.detach %20 : !torch.vtensor<[64,192,27,27],f32> -> !torch.vtensor<[64,192,27,27],f32>
+    %int3_23 = torch.constant.int 3
+    %int3_24 = torch.constant.int 3
+    %22 = torch.prim.ListConstruct %int3_23, %int3_24 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int2_25 = torch.constant.int 2
+    %int2_26 = torch.constant.int 2
+    %23 = torch.prim.ListConstruct %int2_25, %int2_26 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int0_27 = torch.constant.int 0
+    %int0_28 = torch.constant.int 0
+    %24 = torch.prim.ListConstruct %int0_27, %int0_28 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_29 = torch.constant.int 1
+    %int1_30 = torch.constant.int 1
+    %25 = torch.prim.ListConstruct %int1_29, %int1_30 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_31 = torch.constant.bool false
+    %result0_32, %result1_33 = torch.aten.max_pool2d_with_indices %20, %22, %23, %24, %25, %false_31 : !torch.vtensor<[64,192,27,27],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool -> !torch.vtensor<[64,192,13,13],f32>, !torch.vtensor<[64,192,13,13],si64>
+    %_params.features.6.weight = util.global.load @_params.features.6.weight : tensor<384x192x3x3xf32>
+    %26 = torch_c.from_builtin_tensor %_params.features.6.weight : tensor<384x192x3x3xf32> -> !torch.vtensor<[384,192,3,3],f32>
+    %_params.features.6.bias = util.global.load @_params.features.6.bias : tensor<384xf32>
+    %27 = torch_c.from_builtin_tensor %_params.features.6.bias : tensor<384xf32> -> !torch.vtensor<[384],f32>
+    %int1_34 = torch.constant.int 1
+    %int1_35 = torch.constant.int 1
+    %28 = torch.prim.ListConstruct %int1_34, %int1_35 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_36 = torch.constant.int 1
+    %int1_37 = torch.constant.int 1
+    %29 = torch.prim.ListConstruct %int1_36, %int1_37 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_38 = torch.constant.int 1
+    %int1_39 = torch.constant.int 1
+    %30 = torch.prim.ListConstruct %int1_38, %int1_39 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_40 = torch.constant.bool false
+    %int0_41 = torch.constant.int 0
+    %int0_42 = torch.constant.int 0
+    %31 = torch.prim.ListConstruct %int0_41, %int0_42 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_43 = torch.constant.int 1
+    %32 = torch.aten.convolution %result0_32, %26, %27, %28, %29, %30, %false_40, %31, %int1_43 : !torch.vtensor<[64,192,13,13],f32>, !torch.vtensor<[384,192,3,3],f32>, !torch.vtensor<[384],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[64,384,13,13],f32>
+    %33 = torch.aten.relu %32 : !torch.vtensor<[64,384,13,13],f32> -> !torch.vtensor<[64,384,13,13],f32>
+    %34 = torch.aten.detach %33 : !torch.vtensor<[64,384,13,13],f32> -> !torch.vtensor<[64,384,13,13],f32>
+    %_params.features.8.weight = util.global.load @_params.features.8.weight : tensor<256x384x3x3xf32>
+    %35 = torch_c.from_builtin_tensor %_params.features.8.weight : tensor<256x384x3x3xf32> -> !torch.vtensor<[256,384,3,3],f32>
+    %_params.features.8.bias = util.global.load @_params.features.8.bias : tensor<256xf32>
+    %36 = torch_c.from_builtin_tensor %_params.features.8.bias : tensor<256xf32> -> !torch.vtensor<[256],f32>
+    %int1_44 = torch.constant.int 1
+    %int1_45 = torch.constant.int 1
+    %37 = torch.prim.ListConstruct %int1_44, %int1_45 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_46 = torch.constant.int 1
+    %int1_47 = torch.constant.int 1
+    %38 = torch.prim.ListConstruct %int1_46, %int1_47 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_48 = torch.constant.int 1
+    %int1_49 = torch.constant.int 1
+    %39 = torch.prim.ListConstruct %int1_48, %int1_49 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_50 = torch.constant.bool false
+    %int0_51 = torch.constant.int 0
+    %int0_52 = torch.constant.int 0
+    %40 = torch.prim.ListConstruct %int0_51, %int0_52 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_53 = torch.constant.int 1
+    %41 = torch.aten.convolution %33, %35, %36, %37, %38, %39, %false_50, %40, %int1_53 : !torch.vtensor<[64,384,13,13],f32>, !torch.vtensor<[256,384,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[64,256,13,13],f32>
+    %42 = torch.aten.relu %41 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32>
+    %43 = torch.aten.detach %42 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32>
+    %_params.features.10.weight = util.global.load @_params.features.10.weight : tensor<256x256x3x3xf32>
+    %44 = torch_c.from_builtin_tensor %_params.features.10.weight : tensor<256x256x3x3xf32> -> !torch.vtensor<[256,256,3,3],f32>
+    %_params.features.10.bias = util.global.load @_params.features.10.bias : tensor<256xf32>
+    %45 = torch_c.from_builtin_tensor %_params.features.10.bias : tensor<256xf32> -> !torch.vtensor<[256],f32>
+    %int1_54 = torch.constant.int 1
+    %int1_55 = torch.constant.int 1
+    %46 = torch.prim.ListConstruct %int1_54, %int1_55 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_56 = torch.constant.int 1
+    %int1_57 = torch.constant.int 1
+    %47 = torch.prim.ListConstruct %int1_56, %int1_57 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_58 = torch.constant.int 1
+    %int1_59 = torch.constant.int 1
+    %48 = torch.prim.ListConstruct %int1_58, %int1_59 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_60 = torch.constant.bool false
+    %int0_61 = torch.constant.int 0
+    %int0_62 = torch.constant.int 0
+    %49 = torch.prim.ListConstruct %int0_61, %int0_62 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_63 = torch.constant.int 1
+    %50 = torch.aten.convolution %42, %44, %45, %46, %47, %48, %false_60, %49, %int1_63 : !torch.vtensor<[64,256,13,13],f32>, !torch.vtensor<[256,256,3,3],f32>, !torch.vtensor<[256],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[64,256,13,13],f32>
+    %51 = torch.aten.relu %50 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32>
+    %52 = torch.aten.detach %51 : !torch.vtensor<[64,256,13,13],f32> -> !torch.vtensor<[64,256,13,13],f32>
+    %int3_64 = torch.constant.int 3
+    %int3_65 = torch.constant.int 3
+    %53 = torch.prim.ListConstruct %int3_64, %int3_65 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int2_66 = torch.constant.int 2
+    %int2_67 = torch.constant.int 2
+    %54 = torch.prim.ListConstruct %int2_66, %int2_67 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int0_68 = torch.constant.int 0
+    %int0_69 = torch.constant.int 0
+    %55 = torch.prim.ListConstruct %int0_68, %int0_69 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_70 = torch.constant.int 1
+    %int1_71 = torch.constant.int 1
+    %56 = torch.prim.ListConstruct %int1_70, %int1_71 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_72 = torch.constant.bool false
+    %result0_73, %result1_74 = torch.aten.max_pool2d_with_indices %51, %53, %54, %55, %56, %false_72 : !torch.vtensor<[64,256,13,13],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool -> !torch.vtensor<[64,256,6,6],f32>, !torch.vtensor<[64,256,6,6],si64>
+    %int1_75 = torch.constant.int 1
+    %int1_76 = torch.constant.int 1
+    %57 = torch.prim.ListConstruct %int1_75, %int1_76 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int1_77 = torch.constant.int 1
+    %int1_78 = torch.constant.int 1
+    %58 = torch.prim.ListConstruct %int1_77, %int1_78 : (!torch.int, !torch.int) -> !torch.list<int>
+    %int0_79 = torch.constant.int 0
+    %int0_80 = torch.constant.int 0
+    %59 = torch.prim.ListConstruct %int0_79, %int0_80 : (!torch.int, !torch.int) -> !torch.list<int>
+    %false_81 = torch.constant.bool false
+    %true = torch.constant.bool true
+    %none = torch.constant.none
+    %60 = torch.aten.avg_pool2d %result0_73, %57, %58, %59, %false_81, %true, %none : !torch.vtensor<[64,256,6,6],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[64,256,6,6],f32>
+    %int64 = torch.constant.int 64
+    %int9216 = torch.constant.int 9216
+    %61 = torch.prim.ListConstruct %int64, %int9216 : (!torch.int, !torch.int) -> !torch.list<int>
+    %62 = torch.aten.view %60, %61 : !torch.vtensor<[64,256,6,6],f32>, !torch.list<int> -> !torch.vtensor<[64,9216],f32>
+    %float5.000000e-01 = torch.constant.float 5.000000e-01
+    %true_82 = torch.constant.bool true
+    %result0_83, %result1_84 = torch.aten.native_dropout %62, %float5.000000e-01, %true_82 : !torch.vtensor<[64,9216],f32>, !torch.float, !torch.bool -> !torch.vtensor<[64,9216],f32>, !torch.vtensor<[64,9216],i1>
+    %_params.classifier.1.weight = util.global.load @_params.classifier.1.weight : tensor<4096x9216xf32>
+    %63 = torch_c.from_builtin_tensor %_params.classifier.1.weight : tensor<4096x9216xf32> -> !torch.vtensor<[4096,9216],f32>
+    %int0_85 = torch.constant.int 0
+    %int1_86 = torch.constant.int 1
+    %64 = torch.aten.transpose.int %63, %int0_85, %int1_86 : !torch.vtensor<[4096,9216],f32>, !torch.int, !torch.int -> !torch.vtensor<[9216,4096],f32>
+    %65 = torch.aten.mm %result0_83, %64 : !torch.vtensor<[64,9216],f32>, !torch.vtensor<[9216,4096],f32> -> !torch.vtensor<[64,4096],f32>
+    %int1_87 = torch.constant.int 1
+    %66 = torch.aten.mul.Scalar %65, %int1_87 : !torch.vtensor<[64,4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32>
+    %_params.classifier.1.bias = util.global.load @_params.classifier.1.bias : tensor<4096xf32>
+    %67 = torch_c.from_builtin_tensor %_params.classifier.1.bias : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
+    %int1_88 = torch.constant.int 1
+    %68 = torch.aten.mul.Scalar %67, %int1_88 : !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[4096],f32>
+    %int1_89 = torch.constant.int 1
+    %69 = torch.aten.add.Tensor %66, %68, %int1_89 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32>
+    %70 = torch.aten.relu %69 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32>
+    %71 = torch.aten.detach %70 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32>
+    %float5.000000e-01_90 = torch.constant.float 5.000000e-01
+    %true_91 = torch.constant.bool true
+    %result0_92, %result1_93 = torch.aten.native_dropout %70, %float5.000000e-01_90, %true_91 : !torch.vtensor<[64,4096],f32>, !torch.float, !torch.bool -> !torch.vtensor<[64,4096],f32>, !torch.vtensor<[64,4096],i1>
+    %_params.classifier.4.weight = util.global.load @_params.classifier.4.weight : tensor<4096x4096xf32>
+    %72 = torch_c.from_builtin_tensor %_params.classifier.4.weight : tensor<4096x4096xf32> -> !torch.vtensor<[4096,4096],f32>
+    %int0_94 = torch.constant.int 0
+    %int1_95 = torch.constant.int 1
+    %73 = torch.aten.transpose.int %72, %int0_94, %int1_95 : !torch.vtensor<[4096,4096],f32>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f32>
+    %74 = torch.aten.mm %result0_92, %73 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096,4096],f32> -> !torch.vtensor<[64,4096],f32>
+    %int1_96 = torch.constant.int 1
+    %75 = torch.aten.mul.Scalar %74, %int1_96 : !torch.vtensor<[64,4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32>
+    %_params.classifier.4.bias = util.global.load @_params.classifier.4.bias : tensor<4096xf32>
+    %76 = torch_c.from_builtin_tensor %_params.classifier.4.bias : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
+    %int1_97 = torch.constant.int 1
+    %77 = torch.aten.mul.Scalar %76, %int1_97 : !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[4096],f32>
+    %int1_98 = torch.constant.int 1
+    %78 = torch.aten.add.Tensor %75, %77, %int1_98 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096],f32>, !torch.int -> !torch.vtensor<[64,4096],f32>
+    %79 = torch.aten.relu %78 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32>
+    %80 = torch.aten.detach %79 : !torch.vtensor<[64,4096],f32> -> !torch.vtensor<[64,4096],f32>
+    %_params.classifier.6.weight = util.global.load @_params.classifier.6.weight : tensor<1000x4096xf32>
+    %81 = torch_c.from_builtin_tensor %_params.classifier.6.weight : tensor<1000x4096xf32> -> !torch.vtensor<[1000,4096],f32>
+    %int0_99 = torch.constant.int 0
+    %int1_100 = torch.constant.int 1
+    %82 = torch.aten.transpose.int %81, %int0_99, %int1_100 : !torch.vtensor<[1000,4096],f32>, !torch.int, !torch.int -> !torch.vtensor<[4096,1000],f32>
+    %83 = torch.aten.mm %79, %82 : !torch.vtensor<[64,4096],f32>, !torch.vtensor<[4096,1000],f32> -> !torch.vtensor<[64,1000],f32>
+    %int1_101 = torch.constant.int 1
+    %84 = torch.aten.mul.Scalar %83, %int1_101 : !torch.vtensor<[64,1000],f32>, !torch.int -> !torch.vtensor<[64,1000],f32>
+    %_params.classifier.6.bias = util.global.load @_params.classifier.6.bias : tensor<1000xf32>
+    %85 = torch_c.from_builtin_tensor %_params.classifier.6.bias : tensor<1000xf32> -> !torch.vtensor<[1000],f32>
+    %int1_102 = torch.constant.int 1
+    %86 = torch.aten.mul.Scalar %85, %int1_102 : !torch.vtensor<[1000],f32>, !torch.int -> !torch.vtensor<[1000],f32>
+    %int1_103 = torch.constant.int 1
+    %87 = torch.aten.add.Tensor %84, %86, %int1_103 : !torch.vtensor<[64,1000],f32>, !torch.vtensor<[1000],f32>, !torch.int -> !torch.vtensor<[64,1000],f32>
+    return %87 : !torch.vtensor<[64,1000],f32>
+  }
+}
diff --git a/alexnet/linalg.mlir b/alexnet/linalg.mlir
new file mode 100644
index 00000000..a795093f
--- /dev/null
+++ b/alexnet/linalg.mlir
@@ -0,0 +1,335 @@
+module @AlexNet {
+  util.global private mutable @global_seed = dense<0> : tensor<i64>
+  util.global private @_params.features.0.weight {noinline} = dense_resource<__elided__> : tensor<64x3x11x11xf32>
+  util.global private @_params.features.0.bias {noinline} = dense_resource<__elided__> : tensor<64xf32>
+  util.global private @_params.features.3.weight {noinline} = dense_resource<__elided__> : tensor<192x64x5x5xf32>
+  util.global private @_params.features.3.bias {noinline} = dense_resource<__elided__> : tensor<192xf32>
+  util.global private @_params.features.6.weight {noinline} = dense_resource<__elided__> : tensor<384x192x3x3xf32>
+  util.global private @_params.features.6.bias {noinline} = dense_resource<__elided__> : tensor<384xf32>
+  util.global private @_params.features.8.weight {noinline} = dense_resource<__elided__> : tensor<256x384x3x3xf32>
+  util.global private @_params.features.8.bias {noinline} = dense_resource<__elided__> : tensor<256xf32>
+  util.global private @_params.features.10.weight {noinline} = dense_resource<__elided__> : tensor<256x256x3x3xf32>
+  util.global private @_params.features.10.bias {noinline} = dense_resource<__elided__> : tensor<256xf32>
+  util.global private @_params.classifier.1.weight {noinline} = dense_resource<__elided__> : tensor<4096x9216xf32>
+  util.global private @_params.classifier.1.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32>
+  util.global private @_params.classifier.4.weight {noinline} = dense_resource<__elided__> : tensor<4096x4096xf32>
+  util.global private @_params.classifier.4.bias {noinline} = dense_resource<__elided__> : tensor<4096xf32>
+  util.global private @_params.classifier.6.weight {noinline} = dense_resource<__elided__> : tensor<1000x4096xf32>
+  util.global private @_params.classifier.6.bias {noinline} = dense_resource<__elided__> : tensor<1000xf32>
+  func.func @main(%arg0: tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> attributes {torch.args_schema = "[1, {\22type\22: \22builtins.tuple\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: \22builtins.list\22, \22context\22: \22null\22, \22children_spec\22: [{\22type\22: null, \22context\22: null, \22children_spec\22: []}]}, {\22type\22: \22builtins.dict\22, \22context\22: \22[]\22, \22children_spec\22: []}]}]", torch.return_schema = "[1, {\22type\22: null, \22context\22: null, \22children_spec\22: []}]"} {
+    %0 = call @forward(%arg0) : (tensor<64x3x224x224xf32>) -> tensor<64x1000xf32>
+    return %0 : tensor<64x1000xf32>
+  }
+  func.func private @forward(%arg0: tensor<64x3x224x224xf32>) -> tensor<64x1000xf32> {
+    %cst = arith.constant 0.000000e+00 : f32
+    %cst_0 = arith.constant 0xFF800000 : f32
+    %c6364136223846793005_i64 = arith.constant 6364136223846793005 : i64
+    %c1442695040888963407_i64 = arith.constant 1442695040888963407 : i64
+    %c32_i64 = arith.constant 32 : i64
+    %cst_1 = arith.constant 5.4210107999999998E-20 : f64
+    %cst_2 = arith.constant 5.000000e-01 : f64
+    %cst_3 = arith.constant 0.000000e+00 : f64
+    %c2 = arith.constant 2 : index
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c9216_i64 = arith.constant 9216 : i64
+    %cst_4 = arith.constant 5.000000e-01 : f32
+    %c4096_i64 = arith.constant 4096 : i64
+    %_params.features.0.weight = util.global.load @_params.features.0.weight : tensor<64x3x11x11xf32>
+    %_params.features.0.bias = util.global.load @_params.features.0.bias : tensor<64xf32>
+    %padded = tensor.pad %arg0 low[0, 0, 2, 2] high[0, 0, 2, 2] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<64x3x224x224xf32> to tensor<64x3x228x228xf32>
+    %0 = tensor.empty() : tensor<64x64x55x55xf32>
+    %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.0.bias : tensor<64xf32>) outs(%0 : tensor<64x64x55x55xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<64x64x55x55xf32>
+    %2 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<4> : vector<2xi64>} ins(%padded, %_params.features.0.weight : tensor<64x3x228x228xf32>, tensor<64x3x11x11xf32>) outs(%1 : tensor<64x64x55x55xf32>) -> tensor<64x64x55x55xf32>
+    %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%2 : tensor<64x64x55x55xf32>) outs(%0 : tensor<64x64x55x55xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x64x55x55xf32>
+    %4 = tensor.empty() : tensor<64x64x27x27xf32>
+    %5 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<64x64x27x27xf32>) -> tensor<64x64x27x27xf32>
+    %6 = tensor.empty() : tensor<3x3xf32>
+    %7 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%3, %6 : tensor<64x64x55x55xf32>, tensor<3x3xf32>) outs(%5 : tensor<64x64x27x27xf32>) -> tensor<64x64x27x27xf32>
+    %_params.features.3.weight = util.global.load @_params.features.3.weight : tensor<192x64x5x5xf32>
+    %_params.features.3.bias = util.global.load @_params.features.3.bias : tensor<192xf32>
+    %padded_5 = tensor.pad %7 low[0, 0, 2, 2] high[0, 0, 2, 2] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<64x64x27x27xf32> to tensor<64x64x31x31xf32>
+    %8 = tensor.empty() : tensor<64x192x27x27xf32>
+    %9 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.3.bias : tensor<192xf32>) outs(%8 : tensor<64x192x27x27xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<64x192x27x27xf32>
+    %10 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_5, %_params.features.3.weight : tensor<64x64x31x31xf32>, tensor<192x64x5x5xf32>) outs(%9 : tensor<64x192x27x27xf32>) -> tensor<64x192x27x27xf32>
+    %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%10 : tensor<64x192x27x27xf32>) outs(%8 : tensor<64x192x27x27xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x192x27x27xf32>
+    %12 = tensor.empty() : tensor<64x192x13x13xf32>
+    %13 = linalg.fill ins(%cst_0 : f32) outs(%12 : tensor<64x192x13x13xf32>) -> tensor<64x192x13x13xf32>
+    %14 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%11, %6 : tensor<64x192x27x27xf32>, tensor<3x3xf32>) outs(%13 : tensor<64x192x13x13xf32>) -> tensor<64x192x13x13xf32>
+    %_params.features.6.weight = util.global.load @_params.features.6.weight : tensor<384x192x3x3xf32>
+    %_params.features.6.bias = util.global.load @_params.features.6.bias : tensor<384xf32>
+    %padded_6 = tensor.pad %14 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<64x192x13x13xf32> to tensor<64x192x15x15xf32>
+    %15 = tensor.empty() : tensor<64x384x13x13xf32>
+    %16 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.6.bias : tensor<384xf32>) outs(%15 : tensor<64x384x13x13xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<64x384x13x13xf32>
+    %17 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_6, %_params.features.6.weight : tensor<64x192x15x15xf32>, tensor<384x192x3x3xf32>) outs(%16 : tensor<64x384x13x13xf32>) -> tensor<64x384x13x13xf32>
+    %18 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%17 : tensor<64x384x13x13xf32>) outs(%15 : tensor<64x384x13x13xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x384x13x13xf32>
+    %_params.features.8.weight = util.global.load @_params.features.8.weight : tensor<256x384x3x3xf32>
+    %_params.features.8.bias = util.global.load @_params.features.8.bias : tensor<256xf32>
+    %padded_7 = tensor.pad %18 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<64x384x13x13xf32> to tensor<64x384x15x15xf32>
+    %19 = tensor.empty() : tensor<64x256x13x13xf32>
+    %20 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.8.bias : tensor<256xf32>) outs(%19 : tensor<64x256x13x13xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<64x256x13x13xf32>
+    %21 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_7, %_params.features.8.weight : tensor<64x384x15x15xf32>, tensor<256x384x3x3xf32>) outs(%20 : tensor<64x256x13x13xf32>) -> tensor<64x256x13x13xf32>
+    %22 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%21 : tensor<64x256x13x13xf32>) outs(%19 : tensor<64x256x13x13xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x256x13x13xf32>
+    %_params.features.10.weight = util.global.load @_params.features.10.weight : tensor<256x256x3x3xf32>
+    %_params.features.10.bias = util.global.load @_params.features.10.bias : tensor<256xf32>
+    %padded_8 = tensor.pad %22 low[0, 0, 1, 1] high[0, 0, 1, 1] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %cst : f32
+    } : tensor<64x256x13x13xf32> to tensor<64x256x15x15xf32>
+    %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d1)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%_params.features.10.bias : tensor<256xf32>) outs(%19 : tensor<64x256x13x13xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<64x256x13x13xf32>
+    %24 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%padded_8, %_params.features.10.weight : tensor<64x256x15x15xf32>, tensor<256x256x3x3xf32>) outs(%23 : tensor<64x256x13x13xf32>) -> tensor<64x256x13x13xf32>
+    %25 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%24 : tensor<64x256x13x13xf32>) outs(%19 : tensor<64x256x13x13xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x256x13x13xf32>
+    %26 = tensor.empty() : tensor<64x256x6x6xf32>
+    %27 = linalg.fill ins(%cst_0 : f32) outs(%26 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32>
+    %28 = linalg.pooling_nchw_max {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%25, %6 : tensor<64x256x13x13xf32>, tensor<3x3xf32>) outs(%27 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32>
+    %29 = linalg.fill ins(%cst : f32) outs(%26 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32>
+    %30 = tensor.empty() : tensor<1x1xf32>
+    %31 = linalg.pooling_nchw_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%28, %30 : tensor<64x256x6x6xf32>, tensor<1x1xf32>) outs(%29 : tensor<64x256x6x6xf32>) -> tensor<64x256x6x6xf32>
+    %collapsed = tensor.collapse_shape %31 [[0], [1, 2, 3]] : tensor<64x256x6x6xf32> into tensor<64x9216xf32>
+    %32 = tensor.empty() : tensor<f64>
+    %33 = linalg.fill ins(%cst_2 : f64) outs(%32 : tensor<f64>) -> tensor<f64>
+    %global_seed = util.global.load @global_seed : tensor<i64>
+    %extracted = tensor.extract %global_seed[] : tensor<i64>
+    %34 = arith.muli %extracted, %c6364136223846793005_i64 : i64
+    %35 = arith.addi %34, %c1442695040888963407_i64 : i64
+    %inserted = tensor.insert %35 into %global_seed[] : tensor<i64>
+    util.global.store %inserted, @global_seed : tensor<i64>
+    %36 = tensor.empty() : tensor<64x9216xf64>
+    %37 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%36 : tensor<64x9216xf64>) {
+    ^bb0(%out: f64):
+      %71 = linalg.index 0 : index
+      %72 = arith.index_cast %71 : index to i64
+      %73 = linalg.index 1 : index
+      %74 = arith.index_cast %73 : index to i64
+      %75 = arith.muli %72, %c9216_i64 : i64
+      %76 = arith.addi %75, %74 : i64
+      %77 = arith.muli %76, %35 : i64
+      %78 = arith.addi %77, %35 : i64
+      %79 = arith.muli %77, %77 : i64
+      %80 = arith.addi %79, %77 : i64
+      %81 = arith.shli %80, %c32_i64 : i64
+      %82 = arith.shrui %80, %c32_i64 : i64
+      %83 = arith.ori %81, %82 : i64
+      %84 = arith.muli %83, %83 : i64
+      %85 = arith.addi %84, %78 : i64
+      %86 = arith.shli %85, %c32_i64 : i64
+      %87 = arith.shrui %85, %c32_i64 : i64
+      %88 = arith.ori %86, %87 : i64
+      %89 = arith.muli %88, %88 : i64
+      %90 = arith.addi %89, %77 : i64
+      %91 = arith.shli %90, %c32_i64 : i64
+      %92 = arith.shrui %90, %c32_i64 : i64
+      %93 = arith.ori %91, %92 : i64
+      %94 = arith.muli %93, %93 : i64
+      %95 = arith.addi %94, %78 : i64
+      %96 = arith.shli %95, %c32_i64 : i64
+      %97 = arith.shrui %95, %c32_i64 : i64
+      %98 = arith.ori %96, %97 : i64
+      %99 = arith.muli %98, %98 : i64
+      %100 = arith.addi %99, %77 : i64
+      %101 = arith.shrui %100, %c32_i64 : i64
+      %102 = arith.xori %95, %101 : i64
+      %103 = arith.uitofp %102 : i64 to f64
+      %104 = arith.mulf %103, %cst_1 : f64
+      %105 = arith.addf %104, %cst_3 : f64
+      linalg.yield %105 : f64
+    } -> tensor<64x9216xf64>
+    %38 = tensor.empty() : tensor<64x9216xi1>
+    %39 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%37, %33 : tensor<64x9216xf64>, tensor<f64>) outs(%38 : tensor<64x9216xi1>) {
+    ^bb0(%in: f64, %in_12: f64, %out: i1):
+      %71 = arith.cmpf ult, %in, %in_12 : f64
+      linalg.yield %71 : i1
+    } -> tensor<64x9216xi1>
+    %40 = tensor.empty() : tensor<64x9216xf32>
+    %41 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%39 : tensor<64x9216xi1>) outs(%40 : tensor<64x9216xf32>) {
+    ^bb0(%in: i1, %out: f32):
+      %71 = arith.uitofp %in : i1 to f32
+      linalg.yield %71 : f32
+    } -> tensor<64x9216xf32>
+    %42 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%41, %collapsed : tensor<64x9216xf32>, tensor<64x9216xf32>) outs(%40 : tensor<64x9216xf32>) {
+    ^bb0(%in: f32, %in_12: f32, %out: f32):
+      %71 = arith.mulf %in, %in_12 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x9216xf32>
+    %43 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%42 : tensor<64x9216xf32>) outs(%40 : tensor<64x9216xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.divf %in, %cst_4 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x9216xf32>
+    %_params.classifier.1.weight = util.global.load @_params.classifier.1.weight : tensor<4096x9216xf32>
+    %44 = tensor.empty() : tensor<9216x4096xf32>
+    %45 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%_params.classifier.1.weight : tensor<4096x9216xf32>) outs(%44 : tensor<9216x4096xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<9216x4096xf32>
+    %46 = tensor.empty() : tensor<64x4096xf32>
+    %47 = linalg.fill ins(%cst : f32) outs(%46 : tensor<64x4096xf32>) -> tensor<64x4096xf32>
+    %48 = linalg.matmul ins(%43, %45 : tensor<64x9216xf32>, tensor<9216x4096xf32>) outs(%47 : tensor<64x4096xf32>) -> tensor<64x4096xf32>
+    %_params.classifier.1.bias = util.global.load @_params.classifier.1.bias : tensor<4096xf32>
+    %49 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%48, %_params.classifier.1.bias : tensor<64x4096xf32>, tensor<4096xf32>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: f32, %in_12: f32, %out: f32):
+      %71 = arith.addf %in, %in_12 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x4096xf32>
+    %50 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%49 : tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x4096xf32>
+    %global_seed_9 = util.global.load @global_seed : tensor<i64>
+    %extracted_10 = tensor.extract %global_seed_9[] : tensor<i64>
+    %51 = arith.muli %extracted_10, %c6364136223846793005_i64 : i64
+    %52 = arith.addi %51, %c1442695040888963407_i64 : i64
+    %inserted_11 = tensor.insert %52 into %global_seed_9[] : tensor<i64>
+    util.global.store %inserted_11, @global_seed : tensor<i64>
+    %53 = tensor.empty() : tensor<64x4096xf64>
+    %54 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%53 : tensor<64x4096xf64>) {
+    ^bb0(%out: f64):
+      %71 = linalg.index 0 : index
+      %72 = arith.index_cast %71 : index to i64
+      %73 = linalg.index 1 : index
+      %74 = arith.index_cast %73 : index to i64
+      %75 = arith.muli %72, %c4096_i64 : i64
+      %76 = arith.addi %75, %74 : i64
+      %77 = arith.muli %76, %52 : i64
+      %78 = arith.addi %77, %52 : i64
+      %79 = arith.muli %77, %77 : i64
+      %80 = arith.addi %79, %77 : i64
+      %81 = arith.shli %80, %c32_i64 : i64
+      %82 = arith.shrui %80, %c32_i64 : i64
+      %83 = arith.ori %81, %82 : i64
+      %84 = arith.muli %83, %83 : i64
+      %85 = arith.addi %84, %78 : i64
+      %86 = arith.shli %85, %c32_i64 : i64
+      %87 = arith.shrui %85, %c32_i64 : i64
+      %88 = arith.ori %86, %87 : i64
+      %89 = arith.muli %88, %88 : i64
+      %90 = arith.addi %89, %77 : i64
+      %91 = arith.shli %90, %c32_i64 : i64
+      %92 = arith.shrui %90, %c32_i64 : i64
+      %93 = arith.ori %91, %92 : i64
+      %94 = arith.muli %93, %93 : i64
+      %95 = arith.addi %94, %78 : i64
+      %96 = arith.shli %95, %c32_i64 : i64
+      %97 = arith.shrui %95, %c32_i64 : i64
+      %98 = arith.ori %96, %97 : i64
+      %99 = arith.muli %98, %98 : i64
+      %100 = arith.addi %99, %77 : i64
+      %101 = arith.shrui %100, %c32_i64 : i64
+      %102 = arith.xori %95, %101 : i64
+      %103 = arith.uitofp %102 : i64 to f64
+      %104 = arith.mulf %103, %cst_1 : f64
+      %105 = arith.addf %104, %cst_3 : f64
+      linalg.yield %105 : f64
+    } -> tensor<64x4096xf64>
+    %55 = tensor.empty() : tensor<64x4096xi1>
+    %56 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> ()>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%54, %33 : tensor<64x4096xf64>, tensor<f64>) outs(%55 : tensor<64x4096xi1>) {
+    ^bb0(%in: f64, %in_12: f64, %out: i1):
+      %71 = arith.cmpf ult, %in, %in_12 : f64
+      linalg.yield %71 : i1
+    } -> tensor<64x4096xi1>
+    %57 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%56 : tensor<64x4096xi1>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: i1, %out: f32):
+      %71 = arith.uitofp %in : i1 to f32
+      linalg.yield %71 : f32
+    } -> tensor<64x4096xf32>
+    %58 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%57, %50 : tensor<64x4096xf32>, tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: f32, %in_12: f32, %out: f32):
+      %71 = arith.mulf %in, %in_12 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x4096xf32>
+    %59 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%58 : tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.divf %in, %cst_4 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x4096xf32>
+    %_params.classifier.4.weight = util.global.load @_params.classifier.4.weight : tensor<4096x4096xf32>
+    %60 = tensor.empty() : tensor<4096x4096xf32>
+    %61 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%_params.classifier.4.weight : tensor<4096x4096xf32>) outs(%60 : tensor<4096x4096xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<4096x4096xf32>
+    %62 = linalg.matmul ins(%59, %61 : tensor<64x4096xf32>, tensor<4096x4096xf32>) outs(%47 : tensor<64x4096xf32>) -> tensor<64x4096xf32>
+    %_params.classifier.4.bias = util.global.load @_params.classifier.4.bias : tensor<4096xf32>
+    %63 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%62, %_params.classifier.4.bias : tensor<64x4096xf32>, tensor<4096xf32>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: f32, %in_12: f32, %out: f32):
+      %71 = arith.addf %in, %in_12 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x4096xf32>
+    %64 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%63 : tensor<64x4096xf32>) outs(%46 : tensor<64x4096xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      %71 = arith.cmpf ugt, %in, %cst : f32
+      %72 = arith.select %71, %in, %cst : f32
+      linalg.yield %72 : f32
+    } -> tensor<64x4096xf32>
+    %_params.classifier.6.weight = util.global.load @_params.classifier.6.weight : tensor<1000x4096xf32>
+    %65 = tensor.empty() : tensor<4096x1000xf32>
+    %66 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%_params.classifier.6.weight : tensor<1000x4096xf32>) outs(%65 : tensor<4096x1000xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<4096x1000xf32>
+    %67 = tensor.empty() : tensor<64x1000xf32>
+    %68 = linalg.fill ins(%cst : f32) outs(%67 : tensor<64x1000xf32>) -> tensor<64x1000xf32>
+    %69 = linalg.matmul ins(%64, %66 : tensor<64x4096xf32>, tensor<4096x1000xf32>) outs(%68 : tensor<64x1000xf32>) -> tensor<64x1000xf32>
+    %_params.classifier.6.bias = util.global.load @_params.classifier.6.bias : tensor<1000xf32>
+    %70 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%69, %_params.classifier.6.bias : tensor<64x1000xf32>, tensor<1000xf32>) outs(%67 : tensor<64x1000xf32>) {
+    ^bb0(%in: f32, %in_12: f32, %out: f32):
+      %71 = arith.addf %in, %in_12 : f32
+      linalg.yield %71 : f32
+    } -> tensor<64x1000xf32>
+    return %70 : tensor<64x1000xf32>
+  }
+}
\ No newline at end of file
diff --git a/alexnet/requirements.txt b/alexnet/requirements.txt
new file mode 100644
index 00000000..cc6880ea
--- /dev/null
+++ b/alexnet/requirements.txt
@@ -0,0 +1,2 @@
+shark-turbine
+torchvision
diff --git a/alexnet/run.sh b/alexnet/run.sh
new file mode 100644
index 00000000..43103553
--- /dev/null
+++ b/alexnet/run.sh
@@ -0,0 +1,4 @@
+# Script that generated the mlir files in this folder
+
+python test.py > iree.mlir
+iree-compile --iree-input-type=torch --compile-to=input iree.mlir > linalg.mlir
diff --git a/alexnet/test.py b/alexnet/test.py
new file mode 100644
index 00000000..f4e87762
--- /dev/null
+++ b/alexnet/test.py
@@ -0,0 +1,12 @@
+import torch
+import torch.nn as nn
+
+import shark_turbine.aot as aot
+from iree.compiler import compile_str
+
+from torchvision.models import AlexNet
+
+model = AlexNet()
+example_x = torch.empty(64, 3, 224, 224)
+exported = aot.export(model, example_x)
+exported.print_readable()