From bfdca7bef1f67b59b683f4cd669a0f2a5e2b2bb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20M=C3=BChlhaus?= <muehlhaus@bio.uni-kl.de>
Date: Fri, 10 May 2024 13:01:31 +0200
Subject: [PATCH] Import from DiffSharp

---
 TensorMath.sln                                |   51 +-
 bundles/TensorMath-cpu/Empty.fs               |    4 +
 bundles/TensorMath-cpu/TensorMath-cpu.fsproj  |   13 +
 bundles/TensorMath-lite/Empty.fs              |    4 +
 .../TensorMath-lite/TensorMath-lite.fsproj    |   13 +
 examples/what.fsx                             |   26 +
 .../Reference.RawTensor.fs                    | 2367 +++++++
 .../TensorMath.Backends.Reference.fsproj      |   16 +
 .../TensorMath.Backends.Torch.fsproj          |   21 +
 .../TensorMath.Torch.fs                       |   33 +
 .../Torch.RawTensor.fs                        | 1594 +++++
 src/TensorMath/Backend.fs                     |   79 +
 src/TensorMath/Device.fs                      |   62 +
 src/TensorMath/Dtype.fs                       |  129 +
 src/TensorMath/Extensions.fs                  |  319 +
 src/TensorMath/Library.fs                     |    5 -
 src/TensorMath/Op.AvgPool.fs                  |  101 +
 src/TensorMath/Op.BMM.fs                      |   22 +
 src/TensorMath/Op.Det.fs                      |   17 +
 src/TensorMath/Op.Inv.fs                      |   17 +
 src/TensorMath/Op.Norm.fs                     |   30 +
 src/TensorMath/Op.Outer.fs                    |   24 +
 src/TensorMath/Op.Solve.fs                    |   17 +
 src/TensorMath/Printer.fs                     |   39 +
 src/TensorMath/RawTensor.fs                   |  918 +++
 src/TensorMath/Scalar.fs                      |   77 +
 src/TensorMath/Shape.fs                       |  883 +++
 src/TensorMath/Tensor.Slicing.fs              | 2695 ++++++++
 src/TensorMath/Tensor.fs                      | 2431 ++++++++
 src/TensorMath/TensorMath.Compose.fs          |  287 +
 src/TensorMath/TensorMath.fs                  | 1468 +++++
 src/TensorMath/TensorMath.fsproj              |   23 +-
 src/TensorMath/Util.fs                        |  495 ++
 .../Reference.RawTensor.fs                    | 2367 +++++++
 .../TensorMath.Backends.TestDuplicate.fsproj  |   16 +
 tests/TensorMath.Tests/Program.fs             |    7 +-
 .../TensorMath.Tests/TensorMath.Tests.fsproj  |   39 +-
 tests/TensorMath.Tests/TestCombo.fs           |  132 +
 tests/TensorMath.Tests/TestCombos.fs          |   76 +
 tests/TensorMath.Tests/TestOp.AvgPool.fs      |  358 ++
 tests/TensorMath.Tests/TestOp.BMM.fs          |   66 +
 tests/TensorMath.Tests/TestOp.Det.fs          |   40 +
 tests/TensorMath.Tests/TestOp.Inv.fs          |   52 +
 tests/TensorMath.Tests/TestOp.Norm.fs         |  127 +
 tests/TensorMath.Tests/TestOp.Outer.fs        |   47 +
 tests/TensorMath.Tests/TestOp.Solve.fs        |  101 +
 tests/TensorMath.Tests/TestRandom.fs          |   34 +
 tests/TensorMath.Tests/TestTensor.Conv.fs     | 2279 +++++++
 tests/TensorMath.Tests/TestTensor.MaxPool.fs  | 1208 ++++
 tests/TensorMath.Tests/TestTensor.fs          | 5431 +++++++++++++++++
 tests/TensorMath.Tests/TestTensorMath.fs      |  309 +
 tests/TensorMath.Tests/TestUtils.fs           |   41 +
 tests/TensorMath.Tests/Tests.fs               |   15 -
 53 files changed, 26989 insertions(+), 36 deletions(-)
 create mode 100644 bundles/TensorMath-cpu/Empty.fs
 create mode 100644 bundles/TensorMath-cpu/TensorMath-cpu.fsproj
 create mode 100644 bundles/TensorMath-lite/Empty.fs
 create mode 100644 bundles/TensorMath-lite/TensorMath-lite.fsproj
 create mode 100644 examples/what.fsx
 create mode 100644 src/TensorMath.Backends.Reference/Reference.RawTensor.fs
 create mode 100644 src/TensorMath.Backends.Reference/TensorMath.Backends.Reference.fsproj
 create mode 100644 src/TensorMath.Backends.Torch/TensorMath.Backends.Torch.fsproj
 create mode 100644 src/TensorMath.Backends.Torch/TensorMath.Torch.fs
 create mode 100644 src/TensorMath.Backends.Torch/Torch.RawTensor.fs
 create mode 100644 src/TensorMath/Backend.fs
 create mode 100644 src/TensorMath/Device.fs
 create mode 100644 src/TensorMath/Dtype.fs
 create mode 100644 src/TensorMath/Extensions.fs
 delete mode 100644 src/TensorMath/Library.fs
 create mode 100644 src/TensorMath/Op.AvgPool.fs
 create mode 100644 src/TensorMath/Op.BMM.fs
 create mode 100644 src/TensorMath/Op.Det.fs
 create mode 100644 src/TensorMath/Op.Inv.fs
 create mode 100644 src/TensorMath/Op.Norm.fs
 create mode 100644 src/TensorMath/Op.Outer.fs
 create mode 100644 src/TensorMath/Op.Solve.fs
 create mode 100644 src/TensorMath/Printer.fs
 create mode 100644 src/TensorMath/RawTensor.fs
 create mode 100644 src/TensorMath/Scalar.fs
 create mode 100644 src/TensorMath/Shape.fs
 create mode 100644 src/TensorMath/Tensor.Slicing.fs
 create mode 100644 src/TensorMath/Tensor.fs
 create mode 100644 src/TensorMath/TensorMath.Compose.fs
 create mode 100644 src/TensorMath/TensorMath.fs
 create mode 100644 src/TensorMath/Util.fs
 create mode 100644 tests/TensorMath.Backends.TestDuplicate/Reference.RawTensor.fs
 create mode 100644 tests/TensorMath.Backends.TestDuplicate/TensorMath.Backends.TestDuplicate.fsproj
 create mode 100644 tests/TensorMath.Tests/TestCombo.fs
 create mode 100644 tests/TensorMath.Tests/TestCombos.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.AvgPool.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.BMM.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.Det.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.Inv.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.Norm.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.Outer.fs
 create mode 100644 tests/TensorMath.Tests/TestOp.Solve.fs
 create mode 100644 tests/TensorMath.Tests/TestRandom.fs
 create mode 100644 tests/TensorMath.Tests/TestTensor.Conv.fs
 create mode 100644 tests/TensorMath.Tests/TestTensor.MaxPool.fs
 create mode 100644 tests/TensorMath.Tests/TestTensor.fs
 create mode 100644 tests/TensorMath.Tests/TestTensorMath.fs
 create mode 100644 tests/TensorMath.Tests/TestUtils.fs
 delete mode 100644 tests/TensorMath.Tests/Tests.fs

diff --git a/TensorMath.sln b/TensorMath.sln
index e3f3a86..04b4c9d 100644
--- a/TensorMath.sln
+++ b/TensorMath.sln
@@ -9,8 +9,6 @@ Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TensorMath", "src\TensorMat
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{BAA394CB-3D6F-4CE9-BAE8-56603DBE7793}"
 EndProject
-Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TensorMath.Tests", "tests\TensorMath.Tests\TensorMath.Tests.fsproj", "{6D6C1F8A-1AFE-4BEE-A073-24515FCC6460}"
-EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{7007FA68-0E95-42A0-B25C-A9BBA6071B34}"
 EndProject
 Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "build", "build\build.fsproj", "{D305E2AA-681F-47ED-87C8-7A9F6EA2F1A6}"
@@ -21,6 +19,20 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".ci", ".ci", "{CD408BBB-CFD
 		.github\workflows\deploy-gh-pages.yml = .github\workflows\deploy-gh-pages.yml
 	EndProjectSection
 EndProject
+Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TensorMath.Backends.Reference", "src\TensorMath.Backends.Reference\TensorMath.Backends.Reference.fsproj", "{F973F65A-3E9F-4780-84A8-E10C5EEA86F1}"
+EndProject
+Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TensorMath.Tests", "tests\TensorMath.Tests\TensorMath.Tests.fsproj", "{6A7CBDA7-5E2B-4818-A152-DE64031ACCEA}"
+EndProject
+Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TensorMath.Backends.TestDuplicate", "tests\TensorMath.Backends.TestDuplicate\TensorMath.Backends.TestDuplicate.fsproj", "{03D729EA-CE26-4AF4-887E-4339E38DBF11}"
+EndProject
+Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "TensorMath.Backends.Torch", "src\TensorMath.Backends.Torch\TensorMath.Backends.Torch.fsproj", "{D03FFF26-A7AA-4C9F-B226-D0F07FD08A5F}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "bundles", "bundles", "{26AD5F3B-A910-4128-971C-FE9780005B1E}"
+EndProject
+Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "TensorMath-cpu", "bundles\TensorMath-cpu\TensorMath-cpu.fsproj", "{13198191-E9B3-44B7-8F25-8013F2020900}"
+EndProject
+Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "TensorMath-lite", "bundles\TensorMath-lite\TensorMath-lite.fsproj", "{3E208A02-EFBC-4450-A5EF-BEC5139F1E55}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -31,22 +43,47 @@ Global
 		{03276E48-6B47-463F-A5BC-59580A623ADB}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{03276E48-6B47-463F-A5BC-59580A623ADB}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{03276E48-6B47-463F-A5BC-59580A623ADB}.Release|Any CPU.Build.0 = Release|Any CPU
-		{6D6C1F8A-1AFE-4BEE-A073-24515FCC6460}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{6D6C1F8A-1AFE-4BEE-A073-24515FCC6460}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{6D6C1F8A-1AFE-4BEE-A073-24515FCC6460}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{6D6C1F8A-1AFE-4BEE-A073-24515FCC6460}.Release|Any CPU.Build.0 = Release|Any CPU
 		{D305E2AA-681F-47ED-87C8-7A9F6EA2F1A6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{D305E2AA-681F-47ED-87C8-7A9F6EA2F1A6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{D305E2AA-681F-47ED-87C8-7A9F6EA2F1A6}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{D305E2AA-681F-47ED-87C8-7A9F6EA2F1A6}.Release|Any CPU.Build.0 = Release|Any CPU
+		{F973F65A-3E9F-4780-84A8-E10C5EEA86F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F973F65A-3E9F-4780-84A8-E10C5EEA86F1}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F973F65A-3E9F-4780-84A8-E10C5EEA86F1}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F973F65A-3E9F-4780-84A8-E10C5EEA86F1}.Release|Any CPU.Build.0 = Release|Any CPU
+		{6A7CBDA7-5E2B-4818-A152-DE64031ACCEA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{6A7CBDA7-5E2B-4818-A152-DE64031ACCEA}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{6A7CBDA7-5E2B-4818-A152-DE64031ACCEA}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{6A7CBDA7-5E2B-4818-A152-DE64031ACCEA}.Release|Any CPU.Build.0 = Release|Any CPU
+		{03D729EA-CE26-4AF4-887E-4339E38DBF11}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{03D729EA-CE26-4AF4-887E-4339E38DBF11}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{03D729EA-CE26-4AF4-887E-4339E38DBF11}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{03D729EA-CE26-4AF4-887E-4339E38DBF11}.Release|Any CPU.Build.0 = Release|Any CPU
+		{D03FFF26-A7AA-4C9F-B226-D0F07FD08A5F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{D03FFF26-A7AA-4C9F-B226-D0F07FD08A5F}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{D03FFF26-A7AA-4C9F-B226-D0F07FD08A5F}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{D03FFF26-A7AA-4C9F-B226-D0F07FD08A5F}.Release|Any CPU.Build.0 = Release|Any CPU
+		{13198191-E9B3-44B7-8F25-8013F2020900}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{13198191-E9B3-44B7-8F25-8013F2020900}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{13198191-E9B3-44B7-8F25-8013F2020900}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{13198191-E9B3-44B7-8F25-8013F2020900}.Release|Any CPU.Build.0 = Release|Any CPU
+		{3E208A02-EFBC-4450-A5EF-BEC5139F1E55}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{3E208A02-EFBC-4450-A5EF-BEC5139F1E55}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{3E208A02-EFBC-4450-A5EF-BEC5139F1E55}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{3E208A02-EFBC-4450-A5EF-BEC5139F1E55}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
 		{03276E48-6B47-463F-A5BC-59580A623ADB} = {5317FD43-7D2F-4F4F-8444-B6AA1285C4BD}
-		{6D6C1F8A-1AFE-4BEE-A073-24515FCC6460} = {BAA394CB-3D6F-4CE9-BAE8-56603DBE7793}
 		{D305E2AA-681F-47ED-87C8-7A9F6EA2F1A6} = {7007FA68-0E95-42A0-B25C-A9BBA6071B34}
+		{F973F65A-3E9F-4780-84A8-E10C5EEA86F1} = {5317FD43-7D2F-4F4F-8444-B6AA1285C4BD}
+		{6A7CBDA7-5E2B-4818-A152-DE64031ACCEA} = {BAA394CB-3D6F-4CE9-BAE8-56603DBE7793}
+		{03D729EA-CE26-4AF4-887E-4339E38DBF11} = {BAA394CB-3D6F-4CE9-BAE8-56603DBE7793}
+		{D03FFF26-A7AA-4C9F-B226-D0F07FD08A5F} = {5317FD43-7D2F-4F4F-8444-B6AA1285C4BD}
+		{13198191-E9B3-44B7-8F25-8013F2020900} = {26AD5F3B-A910-4128-971C-FE9780005B1E}
+		{3E208A02-EFBC-4450-A5EF-BEC5139F1E55} = {26AD5F3B-A910-4128-971C-FE9780005B1E}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {7A89F710-DE41-4B47-B450-17CCB1B3A0DC}
diff --git a/bundles/TensorMath-cpu/Empty.fs b/bundles/TensorMath-cpu/Empty.fs
new file mode 100644
index 0000000..aef8dbf
--- /dev/null
+++ b/bundles/TensorMath-cpu/Empty.fs
@@ -0,0 +1,4 @@
+﻿namespace TensorMath
+
+// This project is to bundle TensorMath and some default backends into a single project
+// See TensorMath for main TensorMath code
\ No newline at end of file
diff --git a/bundles/TensorMath-cpu/TensorMath-cpu.fsproj b/bundles/TensorMath-cpu/TensorMath-cpu.fsproj
new file mode 100644
index 0000000..9d4d548
--- /dev/null
+++ b/bundles/TensorMath-cpu/TensorMath-cpu.fsproj
@@ -0,0 +1,13 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>TensorMath_cpu</RootNamespace>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="Empty.fs" />
+  </ItemGroup>
+
+</Project>
diff --git a/bundles/TensorMath-lite/Empty.fs b/bundles/TensorMath-lite/Empty.fs
new file mode 100644
index 0000000..aef8dbf
--- /dev/null
+++ b/bundles/TensorMath-lite/Empty.fs
@@ -0,0 +1,4 @@
+﻿namespace TensorMath
+
+// This project is to bundle TensorMath and some default backends into a single project
+// See TensorMath for main TensorMath code
\ No newline at end of file
diff --git a/bundles/TensorMath-lite/TensorMath-lite.fsproj b/bundles/TensorMath-lite/TensorMath-lite.fsproj
new file mode 100644
index 0000000..309595b
--- /dev/null
+++ b/bundles/TensorMath-lite/TensorMath-lite.fsproj
@@ -0,0 +1,13 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>TensorMath_lite</RootNamespace>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="Empty.fs" />
+  </ItemGroup>
+
+</Project>
diff --git a/examples/what.fsx b/examples/what.fsx
new file mode 100644
index 0000000..0cdee61
--- /dev/null
+++ b/examples/what.fsx
@@ -0,0 +1,26 @@
+#!/usr/bin/env -S dotnet fsi
+
+#I "../tests/TensorMath.Tests/bin/Debug/net8.0"
+#r "TensorMath.dll"
+#r "TensorMath.Backends.Reference.dll"
+#r "TensorMath.Backends.Torch.dll"
+
+// Libtorch binaries
+// Option A: you can use a platform-specific nuget package
+#r "nuget: TorchSharp-cpu"
+// #r "nuget: TorchSharp-cuda-linux, 0.96.5"
+//#r "nuget: TorchSharp-cuda-windows" // #r "nuget: TorchSharp-cuda-windows, 0.96.5"
+// Option B: you can use a local libtorch installation
+// System.Runtime.InteropServices.NativeLibrary.Load("/home/gunes/anaconda3/lib/python3.8/site-packages/torch/lib/libtorch.so")
+
+
+open TensorMath
+
+
+dsharp.config(backend=Backend.Torch, device=Device.CPU)
+dsharp.seed(1)
+
+let t1 = dsharp.tensor [|1.; 2.; 3.; 4.; |]
+
+t1 * t1
+
diff --git a/src/TensorMath.Backends.Reference/Reference.RawTensor.fs b/src/TensorMath.Backends.Reference/Reference.RawTensor.fs
new file mode 100644
index 0000000..6d112f2
--- /dev/null
+++ b/src/TensorMath.Backends.Reference/Reference.RawTensor.fs
@@ -0,0 +1,2367 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+#if TEST_DUPLICATE_BACKEND
+namespace rec TensorMath.Backends.TestDuplicate
+#else
+namespace rec TensorMath.Backends.Reference
+#endif
+
+open System
+open TensorMath
+open TensorMath.Backends
+open TensorMath.Util
+
+#nowarn "77" // use of op_Explicit
+
+[<AutoOpen>]
+module internal Utils = 
+    type RawTensor with
+        member x.GetTypedValues() : 'T[] = (x :?> RawTensorCPU<'T>).Values
+
+/// This is the base class for all RawTensorXyz types.
+/// All type-independent operations are implemented directly on this class. 
+[<AbstractClass>]
+type RawTensorCPU<'T when 'T : equality and 'T :> scalar>(values: 'T[], shape: Shape, dtype: Dtype, device: Device) =
+    inherit RawTensor()
+    do if device.DeviceType = DeviceType.CUDA then failwithf "CUDA is not supported by the reference backend."
+
+    let mutable values = values
+    let mutable isMutable = false
+    let checkMutable() = if not isMutable then failwith "The tensor cannot be mutated." 
+    override _.Shape = shape
+    override _.Dim = shape.Length
+    override _.Nelement = shapeLength shape
+    override _.Dtype = dtype
+    override _.Device = device
+    override _.DeviceType = device.DeviceType
+    override _.Handle = box values
+    override _.Backend =
+#if TEST_DUPLICATE_BACKEND
+        Backend.Register "TestDuplicate"
+#else
+        Backend.Reference
+#endif
+
+    member _.Values : 'T[] = values
+
+    member internal t.IndexToFlatIndex(index:int[]) =
+        indexToFlatIndex t.Shape index
+    
+    member internal t.FlatIndexToIndex(flatIndex:int) =
+        flatIndexToIndex t.Shape flatIndex
+
+    member t.Item
+        with get ([<System.ParamArray>] index:int[]) =
+            // printfn "rawtensor shape %A item index %A" t.Shape index
+            if index.Length <> t.Dim then failwithf "Expecting a %id index" t.Dim
+            let vvv = t.Values[t.IndexToFlatIndex(index)]
+            vvv
+
+        and set ([<System.ParamArray>] index:int[]) v =
+            if index.Length <> t.Dim then failwithf "Expecting a %id index" t.Dim
+            t.Values[t.IndexToFlatIndex(index)] <- v
+
+    override t.GetItem(indexes:int[]) =
+        t[indexes] :> scalar
+
+    override t.GetSlice(fullBounds:int[,]) =
+        let fullBounds = Shape.completeSliceBounds t.Shape fullBounds
+        let shape = Shape.checkCanGetSlice t.Shape fullBounds
+        let array = Array.zeroCreate (shapeLength shape)
+        let mutable arrayi = 0
+        let rec slice (fullBounds:int[,]) externalCoords =
+            if fullBounds.GetLength(0) = 1 then
+                for i=fullBounds[0,0] to fullBounds[0,1] do
+                    // printfn "inner %A" i
+                    let globalCoords = Array.append externalCoords [|i|]
+                    array[arrayi] <- t[globalCoords]
+                    arrayi <- arrayi + 1
+            else
+                for i=fullBounds[0,0] to fullBounds[0,1] do
+                    // printfn "outer %A" i
+                    slice fullBounds[1..,*] (Array.append externalCoords [|i|])
+        slice fullBounds [||]
+        t.MakeLike(array, shape)
+
+    override t.Clone() = t.MakeLike(Array.copy t.Values, Array.copy t.Shape)
+
+    abstract member MakeLike: values: 'T[] * shape: Shape * ?device: Device -> RawTensor
+
+    override x.ComputeHash() = hash shape + hash values
+    
+    override t.Expand(newShape) =
+        if newShape.Length = 1 && newShape[0] = 0 then t.MakeLike([||], newShape) else  // Return zero-sized tensor if expanding to zero-sized tensor
+        if shape = newShape then t :> _ else
+        Shape.checkCanExpand shape newShape
+        let trim = newShape.Length - shape.Length
+        let exp = shapeLength newShape[0..trim-1]
+        let jshape = newShape[trim..]
+        let n = shapeLength newShape
+        let result = Array.zeroCreate n 
+        if jshape.Length = 0 then 
+            // The expansion is everything
+            for jP = 0 to exp-1 do
+                result[jP] <- values[0]
+        else
+            for jP = 0 to exp-1 do
+                let rec loop ibase jbase d = 
+                    let strideD = if (shape[d] = jshape[d]) then 1 else 0
+                    if d < jshape.Length-1 then
+                        let mutable iD = 0
+                        for jD = 0 to jshape[d]-1 do 
+                            let ibaseD = (ibase+iD)*shape[d+1]
+                            let jbaseD = (jbase+jD)*jshape[d+1]
+                            loop ibaseD jbaseD (d+1)
+                            iD <- iD + strideD
+                    else
+                        let mutable iD = 0
+                        // last loop does the actual copy fragments
+                        for jD = 0 to jshape[d]-1 do 
+                            result[jbase+jD] <- values[ibase+iD]
+                            iD <- iD + strideD
+                loop 0 (jP*jshape[0]) 0
+        t.MakeLike(result, newShape)
+
+    override t.ToValues() =
+        let shape = t.Shape
+        match t.Dim with
+        | 0 -> box values[0]
+        | 1 -> upcast Array.init shape[0] (fun i -> t[i])
+        | 2 -> upcast Array2D.init shape[0] shape[1] (fun i j -> t[i, j])
+        | 3 -> upcast Array3D.init shape[0] shape[1] shape[2] (fun i j k -> t[i, j, k])
+        | 4 -> upcast Array4D.init shape[0] shape[1] shape[2] shape[3] (fun i j k l -> t[i, j, k, l])
+        | 5 -> upcast Array5D.init shape[0] shape[1] shape[2] shape[3] shape[4] (fun i j k l m -> t[i, j, k, l, m])
+        | 6 -> upcast Array6D.init shape[0] shape[1] shape[2] shape[3] shape[4] shape[5] (fun i j k l m n -> t[i, j, k, l, m, n])
+        | _ -> ArrayND.init shape (fun idxs -> t[idxs])
+
+    override _.StackTs(tensors, dim) =
+        let values, shapes = tensors |> Array.map (fun t -> t.GetTypedValues(), t.Shape) |> Array.unzip
+        let n, shape1, shape2, newShape = Shape.checkCanStack shapes dim
+        let m1 = shapeLength shape1
+        let m2 = shapeLength shape2
+        let m = m1 * m2
+        let result = Array.zeroCreate (n * m)
+        for i=0 to (n*m)-1 do
+            let chunk = i/m2
+            let i2 = chunk%n
+            let j2 = (chunk/n)*m2+i%m2
+            result[i] <-values[i2][j2]
+
+        (tensors[0] :?> RawTensorCPU<'T>).MakeLike(result, newShape)
+
+    override t.UnstackT(dim) =
+        let shape = t.Shape
+        let shape1, shape2, unstackedShape = Shape.checkCanUnstack shape dim
+        let n = shape[dim]
+        let m1 = shapeLength shape1
+        let m2 = shapeLength shape2
+        let m = m1 * m2
+        let values = t.Values
+        let results = Array.init n (fun _ -> Array.zeroCreate m)
+        for i=0 to (n*m)-1 do
+            let chunk = i/m2
+            let i2 = chunk%n
+            let j2 = (chunk/n)*m2+i%m2
+            results[i2][j2] <- values[i]
+        results |> Array.map (fun rvalues -> t.MakeLike(rvalues, unstackedShape))
+
+    override t.CatTs(tensors, dim) =
+        let values, shapes = tensors |> Array.map (fun t -> t.GetTypedValues(), t.Shape) |> Array.unzip
+        let n, shape1, m2, shape3, outShape = Shape.checkCanCat shapes dim
+        let m1 = shapeLength shape1
+        let m3 = shapeLength shape3
+        let m = m1 * m2 * m3
+        let result = Array.zeroCreate m
+        let mutable i = 0
+        for j1 = 0 to m1-1 do 
+            for k = 0 to n-1 do
+                let d = shapes[k][dim]
+                let b = j1*m3*d
+                for j2 = 0 to d*m3-1 do
+                    result[i+j2] <-values[k][b+j2]
+                i <- i + d*m3
+
+        t.MakeLike(result, outShape)
+
+    override t.SplitT(sizes, dim) =
+        let shape = t.Shape
+        let outShapes = Shape.checkCanSplit shape sizes dim
+        let n = sizes.Length
+        let shape1 = shape[0..dim-1]
+        let shape2 = shape[dim+1..]
+        let m1 = shapeLength shape1
+        let m3 = shapeLength shape2
+        let values = t.Values
+        let results = Array.init n (fun k -> Array.zeroCreate (m1 * sizes[k] * m3))
+        let mutable i = 0
+        for j1 = 0 to m1-1 do 
+            for k = 0 to n-1 do
+                let d = sizes[k]
+                let b = j1*m3*d
+                for j2 = 0 to d*m3-1 do
+                    results[k][b+j2] <-values[i+j2]
+                i <- i + d*m3
+
+        (results, outShapes) ||> Array.map2 (fun rvalues outShape -> 
+            t.MakeLike(rvalues, outShape))
+
+    override t.PermuteT(permutation) =
+        let inversePermutation, newShape = Shape.checkCanPermute t.Shape permutation
+        let result = t.ZerosLike(newShape) :?> RawTensorCPU<'T>
+        let rec transpose (shape:Shape) externalCoords = 
+            if shape.Length = 1 then
+                for i=0 to shape[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let transposedCoords = Array.permute (fun i -> inversePermutation[i]) globalCoords
+                    result[transposedCoords] <- t[globalCoords]
+            else
+                for i=0 to shape[0]-1 do
+                    transpose shape[1..] (Array.append externalCoords [|i|])
+        transpose t.Shape [||]        
+        upcast result
+
+    override t.TransposeT(dim0, dim1) =
+        let permutation = [| 0 .. t.Shape.Length - 1 |]
+        permutation[dim0] <- dim1
+        permutation[dim1] <- dim0
+        t.PermuteT(permutation)
+
+    override t.TransposeT2() =
+        Shape.checkCanTranspose2d t.Dim
+        let tcols = t.Shape[1]
+        let result = Array2D.init t.Shape[1] t.Shape[0] (fun i j -> t.Values[j*tcols + i])
+        t.CreateLike(result)
+
+    override t.SqueezeT(dim) =
+        let result = Array.copy t.Values
+        t.MakeLike(result, Shape.squeeze dim t.Shape)
+
+    override t.UnsqueezeT(dim) =
+        let outputShape = Shape.checkCanUnsqueeze dim t.Shape
+        let result = Array.copy t.Values
+        t.MakeLike(result, outputShape)
+
+    override t.FlipT(dims:int[]) =
+        Shape.checkCanFlip t.Dim dims
+        match t.Dim with
+        | 0 -> t.Clone()
+        | _ ->
+            let result = t.ZerosLike(t.Shape) :?> RawTensorCPU<'T>
+            let rec flip (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    for i=0 to shape[0]-1 do
+                        let globalCoords = Array.append externalCoords [|i|]
+                        result[mirrorCoordinates globalCoords t.Shape dims] <- t[globalCoords]
+                else
+                    for i=0 to shape[0]-1 do
+                        flip shape[1..] (Array.append externalCoords [|i|])
+            flip t.Shape [||]        
+            upcast result
+
+    override t.DilateT(dilations:int[]) =
+        Shape.checkCanDilate t.Dim dilations
+        match t.Dim with
+        | 0 -> t.Clone()
+        | _ ->
+            let result = t.ZerosLike(Shape.dilated t.Shape dilations) :?> RawTensorCPU<'T>
+            let rec dilate (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    for i=0 to shape[0]-1 do
+                        let globalCoords = Array.append externalCoords [|i|]
+                        result[dilatedCoordinates globalCoords dilations] <- t[globalCoords]
+                else
+                    for i=0 to shape[0]-1 do
+                        dilate shape[1..] (Array.append externalCoords [|i|])
+            dilate t.Shape [||]        
+            upcast result        
+
+    override t.UndilateT(dilations:int[]) =
+        match t.Dim with
+        | 0 -> t.Clone()
+        | _ ->
+            let result = t.ZerosLike(Shape.undilatedShape t.Shape dilations) :?> RawTensorCPU<'T>
+            let rec dilate (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    for i=0 to shape[0]-1 do
+                        let globalCoords = Array.append externalCoords [|i|]
+                        result[globalCoords] <- t[dilatedCoordinates globalCoords dilations]
+                else
+                    for i=0 to shape[0]-1 do
+                        dilate shape[1..] (Array.append externalCoords [|i|])
+            dilate result.Shape [||]
+            upcast result
+
+    override t.GatherT(dim:int, indices) =
+        Shape.checkCanGather t.Shape dim indices.Shape indices.Dtype
+        let indices = indices :?> RawTensorCPU<int>
+        let result = t.ZerosLike(indices.Shape) :?> RawTensorCPU<'T>
+        let rec gather (shape:Shape) externalCoords =
+            if shape.Length = 1 then
+                for i=0 to shape[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let globalCoordsIndices = Array.copy globalCoords
+                    globalCoordsIndices[dim] <- indices[globalCoords]
+                    result[globalCoords] <- t[globalCoordsIndices]
+            else
+                for i=0 to shape[0]-1 do
+                    gather shape[1..] (Array.append externalCoords [|i|])
+        gather result.Shape [||]
+        upcast result
+
+    override t.ScatterT(dim:int, indices, destinationShape:Shape) =
+        Shape.checkCanScatter t.Shape dim indices.Shape indices.Dtype destinationShape
+        let indices = indices :?> RawTensorCPU<int>
+        let result = t.ZerosLike(destinationShape) :?> RawTensorCPU<'T>
+        let rec scatter (shape:Shape) externalCoords =
+            if shape.Length = 1 then
+                for i=0 to shape[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let globalCoordsIndices = Array.copy globalCoords
+                    globalCoordsIndices[dim] <- indices[globalCoords]
+                    result[globalCoordsIndices] <- t[globalCoords]
+            else
+                for i=0 to shape[0]-1 do
+                    scatter shape[1..] (Array.append externalCoords [|i|])
+        scatter t.Shape [||]
+        upcast result
+
+    override t.ViewT(shape:Shape) =
+        Shape.checkCanView t.Shape shape
+        let result = Array.copy t.Values
+        t.MakeLike(result, shape)
+
+    override t.Cast(dtype: Dtype) =
+        if dtype = t.Dtype then 
+            upcast t
+        else
+            let tflat = t.ViewT([|t.Nelement|]) // We flatten, cast, and return with the correct shape because .ToValues() in the next line does not support tensors with dimension > 4.
+            let values = 
+                match t.Dtype with
+                // These special cases for byte and int8 are to ensure that values don't get truncated because RawTensor.Create cannot distinguish between byte and int8
+                | Dtype.Byte -> tflat.ToValues():?>byte[] |> Array.map int |> box
+                | Dtype.Int8 -> tflat.ToValues():?>int8[] |> Array.map int |> box
+                | _ -> tflat.ToValues()
+
+            RawTensor.Create(values, dtype=dtype, backend=t.Backend, device=t.Device).ViewT(t.Shape)
+
+    override t.MoveTo(device: Device) = t.MakeLike(values, shape, device=device)
+
+    override t.SetMutable() = isMutable <- true
+    override t.IsMutable = isMutable
+    member t.SetValues(tmp: RawTensor) = checkMutable(); values <- (tmp :?> RawTensorCPU<'T>).Values
+    override t.ClampInPlace(low, high) = t.SetValues <| t.ClampT(low, high)
+    override t.LtInPlace(t2) = t.SetValues <| t.LtTT(t2)
+    override t.GtInPlace(t2) = t.SetValues <| t.GtTT(t2)
+    override t.LeInPlace(t2) = t.SetValues <| t.LeTT(t2)
+    override t.GeInPlace(t2) = t.SetValues <| t.GeTT(t2)
+    override t.EqInPlace(t2) = t.SetValues <| t.EqTT(t2)
+    override t.NeqInPlace(t2) = t.SetValues <| t.NeqTT(t2)
+    override t.AddInPlace(t2, alpha) = t.SetValues <| t.AddTT(t2, ?alpha=alpha)
+    override t.AddScalarInPlace(t2) = t.SetValues <| t.AddTT0(t2)
+    override t.AddSliceInPlace(location, t2) = t.SetValues <| t.AddTTSlice(location, t2)
+    override t.SubInPlace(t2) = t.SetValues <| t.SubTT(t2)
+    override t.SubScalarInPlace(t2) = t.SetValues <| t.SubTT0(t2)
+    override t.MulInPlace(t2) = t.SetValues <| t.MulTT(t2)
+    override t.MulScalarInPlace(t2) = t.SetValues <| t.MulTT0(t2)
+    override t.DivInPlace(t2) = t.SetValues <| t.DivTT(t2)
+    override t.DivScalarInPlace(t2) = t.SetValues <| t.DivTT0(t2)
+    override t.PowInPlace(t2) = t.SetValues <| t.PowTT(t2)
+    override t.PowScalarInPlace(t2) = t.SetValues <| t.PowTT0(t2)
+    override t.MatMulInPlace(t2) = t.SetValues <| t.MatMulTT(t2)
+    override t.NegInPlace() = t.SetValues <| t.NegT()
+    override t.SignInPlace() = t.SetValues <| t.SignT()
+    override t.FloorInPlace() = t.SetValues <| t.FloorT()
+    override t.CeilInPlace() = t.SetValues <| t.CeilT()
+    override t.RoundInPlace() = t.SetValues <| t.RoundT()
+    override t.AbsInPlace() = t.SetValues <| t.AbsT()
+    override t.ReluInPlace() = t.SetValues <| t.ReluT()
+    override t.SoftplusInPlace() = t.SetValues <| t.SoftplusT()
+    override t.SigmoidInPlace() = t.SetValues <| t.SigmoidT()
+    override t.ExpInPlace() = t.SetValues <| t.ExpT()
+    override t.LogInPlace() = t.SetValues <| t.LogT()
+    override t.Log10InPlace() = t.SetValues <| t.Log10T()
+    override t.SqrtInPlace() = t.SetValues <| t.SqrtT()
+    override t.SinInPlace() = t.SetValues <| t.SinT()
+    override t.CosInPlace() = t.SetValues <| t.CosT()
+    override t.TanInPlace() = t.SetValues <| t.TanT()
+    override t.SinhInPlace() = t.SetValues <| t.SinhT()
+    override t.CoshInPlace() = t.SetValues <| t.CoshT()
+    override t.TanhInPlace() = t.SetValues <| t.TanhT()
+    override t.AsinInPlace() = t.SetValues <| t.AsinT()
+    override t.AcosInPlace() = t.SetValues <| t.AcosT()
+    override t.AtanInPlace() = t.SetValues <| t.AtanT()
+    override t.OnesInPlace() = t.SetValues <| t.OnesLike(t.Shape)
+    override t.RandomInPlace() = t.SetValues <| t.RandomLike(t.Shape) 
+    override t.RandomNormalInPlace() = t.SetValues <| t.RandomNormalLike(t.Shape)
+    override t.RandomIntInPlace(low, high) = t.SetValues <| t.RandomIntLike(t.Shape, low, high)
+    override t.ZerosInPlace() = t.SetValues <| t.ZerosLike(t.Shape)
+
+// Defines the math-dependent operations for `RawTensorCPU<T>` types
+// using generic inline code. Each implementing type (e.g. RawTensorFloat32) instantiates
+// inlines these at concrete types.
+//
+// Most of the functions produce (value, shape) pairs for use in constructing an instance
+// of the final implementing type.
+[<System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage>]
+module internal RawTensorCPU = 
+
+    /// Access the natural "0" value for the element of a CPU tensor type
+    let inline zero< ^T when ^T : (static member Zero : ^T) > = LanguagePrimitives.GenericZero< ^T >
+
+    /// Access the natural "1" value for the element of a CPU tensor type
+    let inline one< ^T when ^T : (static member One : ^T) > = LanguagePrimitives.GenericOne< ^T >
+    
+    /// Get the scalar "0" tensor for a CPU tensor type
+    let inline Zero () : (^T[] * Shape) =
+        let values = [|zero< ^T > |]
+        (values, Shape.scalar)
+
+    /// Get the scalar "1" tensor for a CPU tensor type
+    let inline One() : (^T[] * Shape) =
+        let values = [| one< ^T > |]
+        (values, Shape.scalar)
+    
+    /// Get the "0" tensor for a CPU tensor type of the given shape
+    let inline Zeros(shape:Shape)  : (^T[] * Shape) =
+        let values = Array.zeroCreate (shapeLength shape) 
+        (values, shape)
+
+    /// Get the "0" tensor for a CPU tensor type of the given shape
+    let inline Empty(shape:Shape)  : (^T[] * Shape) = Zeros shape
+
+    let inline Ones(shape:Shape) =
+        let values = Array.create (shapeLength shape) one< ^T >
+        (values, shape)
+
+    let inline CreateFromFlatArray (values: System.Array, shape: Shape) : (^T[] * Shape) = 
+        match values with 
+        | :? ( ^T[]) as arr -> arr, shape
+        | _ -> invalidArg "value" (sprintf "Data unsuitable for RawTensorCPU of type %A" typeof< ^T >)
+
+    let inline Equals(t1: RawTensorCPU< ^T >, t2: RawTensor) = 
+        if t1.Dtype <> t2.Dtype then 
+            opNotSupported2 "Equals" t1.Dtype t2.Dtype
+        match t2 with
+        | :? RawTensorCPU< ^T > as t2 -> t1.Shape = t2.Shape && t1.Values = t2.Values
+        | _ -> invalidOp <| sprintf "Cannot compare RawTensors t1 (Shape=%A, Dtype=%A, Device=%A, Backend=%A) and t2 (Shape=%A, Dtype=%A, Device=%A, Backend=%A)" t1.Shape t1.Dtype t1.Device t1.Backend t2.Shape t2.Dtype t2.Device t2.Backend
+
+    let inline Full(shape:Shape, value: ^T) =
+        let result = Array.create (shapeLength shape) value
+        (result, shape)
+
+    let inline AllClose(t1: RawTensorCPU< ^T >, t2:RawTensor, relativeTolerance: ^T, absoluteTolerance: ^T) =
+        match t2 with
+        | :? RawTensorCPU< ^T > as t2 -> t1.Shape = t2.Shape && Array.allClose relativeTolerance absoluteTolerance t1.Values t2.Values
+        | _ -> invalidOp <| sprintf "Cannot compare RawTensors t1 (Shape=%A, Dtype=%A, Device=%A, Backend=%A) and t2 (Shape=%A, Dtype=%A, Device=%A, Backend=%A)" t1.Shape t1.Dtype t1.Device t1.Backend t2.Shape t2.Dtype t2.Device t2.Backend
+
+    let inline ClampT(t: RawTensorCPU< ^T>, low: RawTensor, high:RawTensor) : (^T[] * Shape) =
+        if low.Dim <> 0 || high.Dim <> 0 then failwithf "Expecting scalar low and high"
+        let tvalue = t.Values
+        let lowvalue = low.GetTypedValues()[0]
+        let highvalue = high.GetTypedValues()[0]
+        let result = Array.map (fun v -> (max (min v highvalue) lowvalue)) tvalue
+        (result, t.Shape)
+
+    let inline LtTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (<) t1value t2value
+        (result, t1.Shape)
+
+    let inline GtTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (>) t1value t2value
+        (result, t1.Shape)
+
+    let inline LeTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (<=) t1value t2value
+        (result, t1.Shape)
+
+    let inline GeTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (>=) t1value t2value
+        (result, t1.Shape)
+
+    let inline EqTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (=) t1value t2value
+        (result, t1.Shape)
+
+    let inline NeqTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (<>) t1value t2value
+        (result, t1.Shape)
+
+    let inline MaxIndexT(t: RawTensorCPU< ^T >) =
+        t.FlatIndexToIndex(Seq.maxIndex t.Values)
+
+    let inline MinMaxReduceT op (t: RawTensorCPU< ^T >, dim, keepDim) : RawTensor * RawTensor =
+        let newShape = Shape.checkCanMinMaxReduce dim keepDim t.Shape
+        let shape = t.Shape
+        let shape1 = shape[0..dim-1]
+        let n = shape[dim]
+        let shape2 = shape[dim+1..]
+        let m1 = shapeLength shape1
+        let m3 = shapeLength shape2
+        let values = t.Values
+        let results = Array.zeroCreate (m1 * m3)
+        let indexes = Array.zeroCreate (m1 * m3)
+        for j1 = 0 to m1-1 do 
+            for j2 = 0 to m3-1 do
+                let b = j1*m3 + j2
+                for j3 = 0 to n-1 do
+                    let v = values[j1*n*m3+j3*m3+j2]
+                    if op v results[b] || (j3 = 0) then
+                        results[b] <- v
+                        indexes[b] <- j3
+        let resultsT = t.MakeLike(results, newShape)
+        let indexesT = t.CreateLike(indexes, dtype=Dtype.Int32).ViewT(newShape)
+        resultsT, indexesT
+
+    let inline MinIndexT(t: RawTensorCPU< ^T >) =
+        t.FlatIndexToIndex(Seq.minIndex t.Values)
+
+    let inline AddTT(t1: RawTensorCPU< ^T >, t2: RawTensor, alpha: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (fun a b -> a + alpha * b) t1value t2value
+        (result, t1.Shape)
+
+    let inline AddTT0(t1: RawTensorCPU< ^T >, b: ^T, alpha: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a + alpha * b) t1value
+        (result, t1.Shape)
+
+    let inline internal AddTTSlice(plus, t1: RawTensorCPU< ^T >, location:int[], t2: RawTensor) : (^T[] * Shape) =
+        Shape.checkCanAddSlice t1.Shape location t2.Shape
+        let t1value = t1.Values
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        let result = Array.copy t1value
+        let shape2 = Shape.unsqueezeAs t2.Shape t1.Shape
+        let rec add (shape2:Shape) externalCoords =
+            if shape2.Length = 1 then
+                for i=0 to shape2[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let t1Coords = Array.map2 (+) globalCoords location
+                    let t1FlatIndex = t1.IndexToFlatIndex(t1Coords)
+                    result[t1FlatIndex] <- plus result[t1FlatIndex] t2[globalCoords]
+            else
+                for i=0 to shape2[0]-1 do
+                    add (shape2[1..]) (Array.append externalCoords [|i|])
+        add shape2 [||]
+        (result, t1.Shape)
+
+    let inline SubTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (-) t1value t2value
+        (result, t1.Shape)
+
+    let inline SubT0T(a: ^T, t2: RawTensor) : (^T[] * Shape) =
+        let t2value = t2.GetTypedValues()
+        let result = Array.map (fun b -> a - b) t2value
+        (result, t2.Shape)
+
+    let inline SubTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun t -> t - b) t1value
+        (result, t1.Shape)
+
+    let inline MulTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (*) t1value t2value
+        (result, t1.Shape)
+
+    let inline MulTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a * b) t1value
+        (result, t1.Shape)
+
+    let inline DivTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (/) t1value t2value
+        (result, t1.Shape)
+
+    let inline DivT0T(a: ^T, t2: RawTensor) : (^T[] * Shape) =
+        let t2value = t2.GetTypedValues()
+        let result = Array.map (fun b -> a / b) t2value
+        (result, t2.Shape)
+
+    let inline DivTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a / b) t1value
+        (result, t1.Shape)
+
+    let inline PowTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 ( ** ) t1value t2value
+        (result, t1.Shape)
+
+    let inline PowT0T(a: ^T , t2: RawTensor) : (^T[] * Shape) =
+        let t2value = t2.GetTypedValues()
+        let result = Array.map (fun b -> a ** b) t2value
+        (result, t2.Shape)
+
+    let inline PowTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a ** b) t1value
+        (result, t1.Shape)
+
+    let inline MatMulTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let (t1BatchPart, t1MatrixPart), (t2BatchPart, t2MatrixPart) = Shape.checkCanMatmul t1.Shape t2.Shape
+        if t1BatchPart <> t2BatchPart then failwithf "Cannot matrix multiply raw tensors with shapes %A, %A - mismatch batching" t1.Shape t2.Shape
+        let t1rows, t1cols = t1MatrixPart[0], t1MatrixPart[1]
+        let t2rows, t2cols = t2MatrixPart[0], t2MatrixPart[1]
+        let t1value = t1.Values
+        let t2value = (t2 :?> RawTensorCPU< ^T >).Values        
+        let newShape = Array.append t1BatchPart [| t1rows; t2cols |]
+        let nb = shapeLength t1BatchPart
+        let values = Array.initFlat3D nb t1rows t2cols (fun b i j -> Array.sumBy (fun k -> t1value[b*t1cols*t1rows + i*t1cols + k] * t2value[b*t2cols*t2rows + k*t2cols + j]) [|0..(t2rows-1)|] )
+        (values, newShape)
+    
+    let inline BMMTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        Shape.checkCanBMM t1.Shape t2.Shape |> ignore
+        MatMulTT(t1, t2)
+
+    // Returns the LU decomposition of this matrix. The return values are the LU matrix, pivot indices, and a toggle value indicating the number of row exchanges during the decomposition, which is +1 if the number of exchanges were even, -1 if odd. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline LUDecomposition (m: ^T[,]) =
+        let rows = m.GetLength(0)
+        let res = Array2D.copy m
+        let perm = Array.init rows (fun i -> i)
+        let mutable toggle = LanguagePrimitives.GenericOne<'T>
+        for j = 0 to rows - 2 do
+            let mutable colmax:'T = abs res[j, j]
+            let mutable prow = j
+            for i = j + 1 to rows - 1 do
+                let absresij = abs res[i, j]
+                if absresij > colmax then
+                    colmax <- absresij
+                    prow <- i
+            if prow <> j then
+                let tmprow = res[prow, 0..]
+                res[prow, 0..] <- res[j, 0..]
+                res[j, 0..] <- tmprow
+                let tmp = perm[prow]
+                perm[prow] <- perm[j]
+                perm[j] <- tmp
+                toggle <- -toggle
+            for i = j + 1 to rows - 1 do
+                res[i, j] <- res[i, j] / res[j, j]
+                for k = j + 1 to rows - 1 do
+                    res[i, k] <- res[i, k] - res[i, j] * res[j, k]
+        res, perm, toggle
+
+    // Finds an array that, when multiplied by a LU matrix `lu`, gives array `b`. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline matrixSolveHelper (lu:^T[,]) (b:^T[]) =
+        let n = lu.GetLength 0
+        let x = Array.copy b
+        for i = 1 to n - 1 do
+            let mutable sum = x[i]
+            for j = 0 to i - 1 do
+                sum <- sum - lu[i, j] * x[j]
+            x[i] <- sum
+        x[n - 1] <- x[n - 1] / lu[n - 1, n - 1]
+        for i in (n - 2) .. -1 .. 0 do
+            let mutable sum = x[i]
+            for j = i + 1 to n - 1 do
+                sum <- sum - lu[i, j] * x[j]
+            x[i] <- sum / lu[i, i]
+        x
+
+    // Solves a system of linear equations ax = b, where the coefficients are given in matrix `a` and the result vector is vector `b`. The returned vector will correspond to x. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline solve (a: ^T[,]) (b: ^T[]) =
+        let lu, perm, _ = LUDecomposition a
+        let bp = Array.init (a.GetLength(0)) (fun i -> b[perm[i]])
+        matrixSolveHelper lu bp
+
+    // Inverts matrix. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline inverseMatrix (m: ^T[,]) =
+        let rows = m.GetLength(0)
+        let res = Array2D.copy m
+        let lu, perm, _ = LUDecomposition m
+        let b:'T[] = Array.zeroCreate rows
+        for i = 0 to rows - 1 do
+            for j = 0 to rows - 1 do
+                if i = perm[j] then
+                    b[j] <- LanguagePrimitives.GenericOne<'T>
+                else
+                    b[j] <- LanguagePrimitives.GenericZero<'T>
+            let x = matrixSolveHelper lu b
+            res[0.., i] <- x
+        res
+
+    let inline InverseT(t: RawTensorCPU< ^T >) : RawTensorCPU< ^T > =
+        Shape.checkCanInvert t.Shape
+        let dim = t.Shape.Length
+        if dim = 2 then  // One matrix
+            let tinv = inverseMatrix (t.ToArray() :?> ^T[,])
+            let tinvflat = [|  for i=0 to tinv.GetLength(0)-1 do for j=0 to tinv.GetLength(1)-1 do yield tinv[i, j] |]
+            t.MakeLike(tinvflat, t.Shape) :?> RawTensorCPU<'T>
+        else  // Batch of matrices
+            let tinvs = 
+                t.UnstackT(0)
+                |> Array.map (fun v -> inverseMatrix (v.ToArray() :?> ^T[,]))
+                |> Array.map (fun v -> [|  for i=0 to v.GetLength(0)-1 do for j=0 to v.GetLength(1)-1 do yield v[i, j] |])
+                |> Array.map (fun v -> t.MakeLike(v, [|t.Shape[1]; t.Shape[2]|]))
+            t.StackTs(tinvs, 0) :?> RawTensorCPU<'T>
+    
+    let inline diagonal(square: ^T[,]) =
+        let n = square.GetLength(0)
+        if n <> square.GetLength(1) then failwith "Expecting a square array"
+        Array.init n (fun i -> square[i, i])
+
+    let inline prod(t: ^T[]) =
+        Array.fold (fun s x -> s * x) LanguagePrimitives.GenericOne<'T> t
+
+    let inline DetT(t: RawTensorCPU< ^T >) : RawTensorCPU< ^T > =
+        Shape.checkCanDet t.Shape
+        let dim = t.Shape.Length
+        if dim = 2 then
+            let lu, _, toggle = LUDecomposition(t.ToArray() :?> ^T[,])
+            let d:^T = toggle * (prod (diagonal lu))
+            t.MakeLike([|d|], [||]) :?> RawTensorCPU<'T>
+        else
+            let tdets = 
+                t.UnstackT(0)
+                |> Array.map (fun v -> let lu, _, toggle = LUDecomposition(v.ToArray() :?> ^T[,]) in lu, toggle)
+                |> Array.map (fun (lu, toggle) -> toggle * (prod (diagonal lu)))
+                |> Array.map (fun v -> t.MakeLike([|v|], [||]))
+            t.StackTs(tdets, 0) :?> RawTensorCPU<'T>
+
+    let inline SolveTT(a: RawTensorCPU< ^T >, b: RawTensor) : RawTensorCPU< ^T > =
+        let newShape = Shape.checkCanSolve a.Shape b.Shape
+        let dimA = a.Shape.Length
+        let dimB = b.Shape.Length
+        if dimA = 2 then
+            let n = a.Shape[0]
+            let amatrix = (a.ToArray() :?> ^T[,])
+            if dimB = 1 then
+                let bvector = (b.ToArray() :?> ^T[])
+                let s = solve amatrix bvector
+                a.MakeLike(s, newShape) :?> RawTensorCPU<'T>
+            else // dimB = 2
+                let cols =
+                    b.UnstackT(1) 
+                    |> Array.map (fun v -> v.ToArray() :?> ^T[])
+                    |> Array.map (fun v -> solve amatrix v)
+                    |> Array.map (fun v -> a.MakeLike(v, [|n|]))
+                a.StackTs(cols, 1) :?> RawTensorCPU<'T>
+        else // dimA = 3
+            let n = a.Shape[1]
+            if dimB = 2 then
+                let aa = a.UnstackT(0)
+                let bb = b.UnstackT(0)
+                let ss = 
+                    Array.zip aa bb
+                    |> Array.map (fun (aaa, bbb) ->
+                                            let amatrix = (aaa.ToArray() :?> ^T[,])
+                                            let bvector = (bbb.ToArray() :?> ^T[])
+                                            let s = solve amatrix bvector
+                                            a.MakeLike(s, [|n|]))
+                a.StackTs(ss, 0) :?> RawTensorCPU<'T>
+            else // dimB = 3
+                let aa = a.UnstackT(0)
+                let bb = b.UnstackT(0)
+                let ss = 
+                    Array.zip aa bb
+                    |> Array.map (fun (aaa, bbb) ->
+                                            let amatrix = (aaa.ToArray() :?> ^T[,])
+                                            let cols =
+                                                bbb.UnstackT(1)
+                                                |> Array.map (fun v -> v.ToArray() :?> ^T[])
+                                                |> Array.map (fun v -> solve amatrix v)
+                                                |> Array.map (fun v -> a.MakeLike(v, [|n|]))
+                                            a.StackTs(cols, 1))
+                a.StackTs(ss, 0) :?> RawTensorCPU<'T>
+            // failwithf "Unsupported shapes %A %A" a.Shape b.Shape
+
+    let inline MaxPool1D(t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > * RawTensorCPU< int > =
+        let batchSize, channels, inputSize, outputSize, outputShape =
+            Shape.checkCanMaxpool1d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let indices = t1.ZerosLike(outputShape, dtype=Int32) :?> RawTensorCPU<int>
+        let minValue = t1[t1.MinIndexT()] - one
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v=0 to outputSize-1 do
+                    let mutable maxvalue = minValue
+                    let mutable maxindex = -1
+                    for u=0 to kernelSize-1 do
+                        let i = (v*stride) + u - padding
+                        if i >= 0 && i < inputSize then
+                            let value = t1[n, c, i]
+                            if value > maxvalue then
+                                maxvalue <- value
+                                maxindex <- i
+                    result[[|n; c; v|]] <- maxvalue
+                    indices[[|n; c; v|]] <- maxindex
+        result, indices
+
+    let inline MaxPool2D(t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > * RawTensorCPU< int > =
+        let batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), outputShape =
+            Shape.checkCanMaxpool2d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let indices = t1.ZerosLike(outputShape, dtype=Int32) :?> RawTensorCPU<int>
+        let minValue = t1[t1.MinIndexT()] - one
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        let mutable maxvalue = minValue
+                        let mutable maxindexi0 = -1
+                        let mutable maxindexi1 = -1
+                        for u0=0 to kernelHeight-1 do
+                            for u1=0 to kernelWidth-1 do
+                                let i0 = (v0*stride[0]) + u0 - padding[0]
+                                let i1 = (v1*stride[1]) + u1 - padding[1]
+                                if i0 >= 0 && i0 < inputHeight && i1 >= 0 && i1 < inputWidth then
+                                    let value = t1[n, c, i0, i1]
+                                    if value > maxvalue then
+                                        maxvalue <- value
+                                        maxindexi0 <- i0
+                                        maxindexi1 <- i1
+                        result[[|n; c; v0; v1|]] <- maxvalue
+                        indices[[|n; c; v0; v1|]] <- indexToFlatIndex [|inputHeight; inputWidth|] [|maxindexi0; maxindexi1|]
+        result, indices
+
+    let inline MaxPool3D(t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > * RawTensorCPU< int > =
+        let (batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), outputShape) =
+            Shape.checkCanMaxpool3d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let indices = t1.ZerosLike(outputShape, dtype=Int32) :?> RawTensorCPU<int>
+        let minValue = t1[t1.MinIndexT()] - one
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            let mutable maxvalue = minValue
+                            let mutable maxindexi0 = -1
+                            let mutable maxindexi1 = -1
+                            let mutable maxindexi2 = -1
+                            for u0=0 to kernelDepth-1 do
+                                for u1=0 to kernelHeight-1 do
+                                    for u2=0 to kernelWidth-1 do
+                                        let i0 = (v0*stride[0]) + u0 - padding[0]
+                                        let i1 = (v1*stride[1]) + u1 - padding[1]
+                                        let i2 = (v2*stride[2]) + u2 - padding[2]
+                                        if i0 >= 0 && i0 < inputDepth && i1 >= 0 && i1 < inputHeight && i2 >= 0 && i2 < inputWidth then
+                                            let value = t1[n, c, i0, i1, i2]
+                                            if value > maxvalue then
+                                                maxvalue <- value
+                                                maxindexi0 <- i0
+                                                maxindexi1 <- i1
+                                                maxindexi2 <- i2
+                            result[[|n; c; v0; v1; v2|]] <- maxvalue
+                            indices[[|n; c; v0; v1; v2|]] <- indexToFlatIndex [|inputDepth; inputHeight; inputWidth|] [|maxindexi0; maxindexi1; maxindexi2|]
+        result, indices
+
+    let inline MaxUnpool1D(t1: RawTensorCPU< ^T >, indices: RawTensorCPU<int>, outputSize: int[]) : RawTensorCPU< ^T > =
+        let batchSize, channels, inputSize, outputShape =
+            Shape.checkCanMaxunpool1d t1.Dtype t1.Shape indices.Dtype indices.Shape outputSize
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for u=0 to inputSize-1 do
+                    let i = indices[[|n; c; u|]]
+                    result[[|n; c; i|]] <- t1[[|n; c; u|]]
+        result
+
+    let inline MaxUnpool2D(t1: RawTensorCPU< ^T >, indices: RawTensorCPU<int>, outputSize:int[]) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputHeight, inputWidth), outputShape =
+            Shape.checkCanMaxunpool2d t1.Dtype t1.Shape indices.Dtype indices.Shape outputSize
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for u0=0 to inputHeight-1 do
+                    for u1=0 to inputWidth-1 do
+                        let iflat = indices[[|n; c; u0; u1|]]
+                        let i = flatIndexToIndex [|outputSize[2]; outputSize[3]|] iflat
+                        result[[|n; c; i[0]; i[1]|]] <- t1[[|n; c; u0; u1|]]
+        result
+
+    let inline MaxUnpool3D(t1: RawTensorCPU< ^T >, indices: RawTensorCPU<int>, outputSize:int[]) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputDepth, inputHeight, inputWidth), outputShape =
+            Shape.checkCanMaxunpool3d t1.Dtype t1.Shape indices.Dtype indices.Shape outputSize
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for u0=0 to inputDepth-1 do
+                    for u1=0 to inputHeight-1 do
+                        for u2=0 to inputWidth-1 do
+                            let iflat = indices[[|n; c; u0; u1; u2|]]
+                            let i = flatIndexToIndex [|outputSize[2]; outputSize[3]; outputSize[4]|] iflat
+                            result[[|n; c; i[0]; i[1]; i[2]|]] <- t1[[|n; c; u0; u1; u2|]]
+        result
+
+    let inline Conv1D(t1: RawTensorCPU< ^T >, t2: RawTensor, stride, padding) : RawTensorCPU< ^T > =
+        // t1: input, NxCxI (batchSize x inputChannels x inputLength)
+        // t2: filters, KxCxF (outputChannels x inputChannels x kernelLength)
+        let batchSize, inputChannels, kernelSize, outputChannels, outputSize, outputShape =
+            Shape.checkCanConv1d t1.DeviceType t2.DeviceType t1.Dtype t2.Dtype t1.Shape t2.Shape stride padding 1
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let t1 =
+            if padding = 0 then
+                t1
+            else
+                let tshape = Array.copy t1.Shape
+                tshape[2] <- t1.Shape[2] + padding * 2
+                let t = t1.ZerosLike(tshape)
+                t.AddTTSlice([|0; 0; padding|], t1) :?> RawTensorCPU< ^T >
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        for n=0 to batchSize-1 do
+            for k=0 to outputChannels-1 do
+                for v=0 to outputSize-1 do
+                    let mutable value = zero
+                    for c=0 to inputChannels-1 do
+                        for u=0 to kernelSize-1 do
+                            value <- value + t2[k, c, u] * t1[n, c, (v*stride) + u]
+                    result[[|n; k; v|]] <- value
+        result
+
+    let inline Conv2D(t1: RawTensorCPU< ^T >, t2: RawTensor, stride: int[], padding: int[]) : RawTensorCPU< ^T > =
+        // t1: input, NxCxHxW (batchSize x inputChannels x inputHeight x inputWidth)
+        // t2: filters, KxCxFxG (outputChannels x inputChannels x kernelHeight x kernelWidth)
+        let batchSize, inputChannels, (kernelHeight, kernelWidth), (outputChannels, outputHeight, outputWidth), outputShape =
+            Shape.checkCanConv2d t1.DeviceType t2.DeviceType t1.Dtype t2.Dtype t1.Shape t2.Shape stride padding [|1;1|]
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU< ^T>
+        let t1 =
+            if padding[0] = 0 && padding[1] = 0 then
+                t1
+            else
+                let tshape = Array.copy t1.Shape
+                tshape[2] <- t1.Shape[2] + padding[0] * 2
+                tshape[3] <- t1.Shape[3] + padding[1] * 2
+                let t = t1.ZerosLike(tshape)
+                t.AddTTSlice([|0; 0; padding[0]; padding[1]|], t1) :?> RawTensorCPU< ^T >
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        for n=0 to batchSize-1 do
+            for k=0 to outputChannels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        let mutable value = zero
+                        for c=0 to inputChannels-1 do
+                            for u0=0 to kernelHeight-1 do
+                                for u1=0 to kernelWidth-1 do
+                                    value <- value + t2[k, c, u0, u1] * t1[n, c, (v0*stride[0])+u0, (v1*stride[1])+u1]
+                        result[[|n; k; v0; v1|]] <- value
+        result
+
+    let inline Conv3D(t1: RawTensorCPU< ^T >, t2: RawTensor, stride: int[], padding: int[]) : RawTensorCPU< ^T > =
+        // t1: input, NxCxDxHxW (batchSize x inputChannels x inputDepth x inputHeight x inputWidth)
+        // t2: filters, KxCxExFxG (outputChannels x inputChannels x kernelDepth x kernelHeight x kernelWidth)
+        let batchSize, inputChannels, (kernelDepth, kernelHeight, kernelWidth), (outputChannels, outputDepth, outputHeight, outputWidth), outputShape = 
+            Shape.checkCanConv3d t1.DeviceType t2.DeviceType t1.Dtype t2.Dtype t1.Shape t2.Shape stride padding [|1;1;1|]  
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU< ^T>
+        let t1 =
+            if padding[0] = 0 && padding[1] = 0 && padding[2] = 0 then
+                t1
+            else
+                let tshape = Array.copy t1.Shape
+                tshape[2] <- t1.Shape[2] + padding[0] * 2
+                tshape[3] <- t1.Shape[3] + padding[1] * 2
+                tshape[4] <- t1.Shape[4] + padding[2] * 2
+                let t = t1.ZerosLike(tshape)
+                t.AddTTSlice([|0; 0; padding[0]; padding[1]; padding[2]|], t1) :?> RawTensorCPU< ^T >
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        for n=0 to batchSize-1 do
+            for k=0 to outputChannels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            let mutable value = zero
+                            for c=0 to inputChannels-1 do
+                                for u0=0 to kernelDepth-1 do
+                                    for u1=0 to kernelHeight-1 do
+                                        for u2=0 to kernelWidth-1 do
+                                            // printfn "%A %A %A | %A %A %A" v0 v1 v2 u0 u1 u2
+                                            value <- value + t2[k, c, u0, u1, u2] * t1[n, c, (v0*stride[0])+u0, (v1*stride[1])+u1, (v2*stride[2])+u2]
+                            result[[|n; k; v0; v1; v2|]] <- value
+        result
+
+    let inline AvgPool1D ofInt (t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T >=
+        let batchSize, channels, inputSize, outputSize, outputShape =
+            Shape.checkCanAvgpool1d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v=0 to outputSize-1 do
+                    let mutable avg = zero
+                    for u=0 to kernelSize-1 do
+                        let i = (v*stride) + u - padding
+                        if i >= 0 && i < inputSize then
+                            let value = t1[n, c, i]
+                            avg <- avg + value
+                    result[[|n; c; v|]] <- avg / ofInt kernelSize
+        result
+
+    let inline AvgPool2D ofInt (t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), outputShape =
+            Shape.checkCanAvgpool2d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let kernelSize = kernelHeight * kernelWidth
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        let mutable avg = zero
+                        for u0=0 to kernelHeight-1 do
+                            for u1=0 to kernelWidth-1 do
+                                let i0 = (v0*stride[0]) + u0 - padding[0]
+                                let i1 = (v1*stride[1]) + u1 - padding[1]
+                                if i0 >= 0 && i0 < inputHeight && i1 >= 0 && i1 < inputWidth then
+                                    let value = t1[n, c, i0, i1]
+                                    avg <- avg + value
+                        result[[|n; c; v0; v1|]] <- avg / ofInt kernelSize
+        result
+
+    let inline AvgPool3D ofInt (t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let (batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), outputShape) =
+            Shape.checkCanAvgpool3d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let kernelSize = kernelDepth * kernelHeight * kernelWidth
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            let mutable avg = zero
+                            for u0=0 to kernelDepth-1 do
+                                for u1=0 to kernelHeight-1 do
+                                    for u2=0 to kernelWidth-1 do
+                                        let i0 = (v0*stride[0]) + u0 - padding[0]
+                                        let i1 = (v1*stride[1]) + u1 - padding[1]
+                                        let i2 = (v2*stride[2]) + u2 - padding[2]
+                                        if i0 >= 0 && i0 < inputDepth && i1 >= 0 && i1 < inputHeight && i2 >= 0 && i2 < inputWidth then
+                                            let value = t1[n, c, i0, i1, i2]
+                                            avg <- avg + value
+                            result[[|n; c; v0; v1; v2|]] <- avg / ofInt kernelSize
+        result
+
+    let inline AvgPoolReverse1D ofInt (t1: RawTensorCPU< ^T >, originalInput: RawTensor, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, inputSize, outputSize, _outputShape =
+            Shape.checkCanAvgpool1d t1.Dtype originalInput.Shape kernelSize stride padding
+        let result = t1.ZerosLike(originalInput.Shape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v=0 to outputSize-1 do
+                    for u=0 to kernelSize-1 do
+                        let i = (v*stride) + u - padding
+                        if i >= 0 && i < inputSize then
+                            result[[|n; c; i|]] <- t1[[|n; c; v|]] / ofInt kernelSize
+        result
+
+    let inline AvgPoolReverse2D ofInt (t1: RawTensorCPU< ^T >, originalInput: RawTensor, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), _outputShape =
+            Shape.checkCanAvgpool2d t1.Dtype originalInput.Shape kernelSize stride padding
+        let kernelSize = kernelHeight * kernelWidth
+        let result = t1.ZerosLike(originalInput.Shape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        for u0=0 to kernelHeight-1 do
+                            for u1=0 to kernelWidth-1 do
+                                let i0 = (v0*stride[0]) + u0 - padding[0]
+                                let i1 = (v1*stride[1]) + u1 - padding[1]
+                                if i0 >= 0 && i0 < inputHeight && i1 >= 0 && i1 < inputWidth then
+                                    result[[|n; c; i0; i1|]] <- t1[[|n; c; v0; v1|]] / ofInt kernelSize
+        result
+
+    let inline AvgPoolReverse3D ofInt (t1: RawTensorCPU< ^T >, originalInput: RawTensor, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), _outputShape =
+            Shape.checkCanAvgpool3d t1.Dtype originalInput.Shape kernelSize stride padding
+        let kernelSize = kernelDepth * kernelHeight * kernelWidth
+        let result = t1.ZerosLike(originalInput.Shape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            for u0=0 to kernelDepth-1 do
+                                for u1=0 to kernelHeight-1 do
+                                    for u2=0 to kernelWidth-1 do
+                                        let i0 = (v0*stride[0]) + u0 - padding[0]
+                                        let i1 = (v1*stride[1]) + u1 - padding[1]
+                                        let i2 = (v2*stride[2]) + u2 - padding[2]
+                                        if i0 >= 0 && i0 < inputDepth && i1 >= 0 && i1 < inputHeight && i2 >= 0 && i2 < inputWidth then
+                                            result[[|n; c; i0; i1; i2|]] <- t1[[|n; c; v0; v1; v2|]] / ofInt kernelSize
+        result
+
+    let inline NegT op (t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = Array.map op t.Values
+        (result, t.Shape)
+
+    let inline SumT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        if Array.isEmpty t.Values then ([|zero< ^T >|], Shape.scalar) else // Return a zero-valued scalar tensor if summing a zero-sized tensor (not holding any value). This is mirroring the behavior in PyTorch 1.5.1.
+        let result = Array.reduce (+) t.Values
+        ([|result|], [||])
+    
+    let inline SumTDim(t: RawTensorCPU< ^T >, dim: int) : RawTensorCPU< ^T > =
+        let sBounds = Array2D.init t.Dim 3 (fun i j -> if j=0 then 0 elif j=1 then t.Shape[i]-1 else 0)
+        sBounds[dim, 1] <- 0
+        sBounds[dim, 2] <- 1
+        let s = t.ZerosLike(shape=t.Shape, dtype=t.Dtype.SummationType).GetSlice(sBounds) :?> RawTensorCPU<'T>
+        s.SetMutable()
+        for i=0 to t.Shape[dim]-1 do
+            sBounds[dim,0] <- i
+            sBounds[dim,1] <- i
+            sBounds[dim,2] <- 1
+            s.AddInPlace(t.GetSlice(sBounds).Cast(t.Dtype.SummationType))
+        s
+
+    let inline SignT op (t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map op
+        (result, t.Shape)
+
+    let inline FloorT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map floor
+        (result, t.Shape)
+
+    let inline CeilT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map ceil
+        (result, t.Shape)
+
+    let inline RoundT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map round
+        (result, t.Shape)
+
+    let inline AbsT op (t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map op
+        (result, t.Shape)
+
+    let inline ReluT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map (max zero< ^T >) 
+        (result, t.Shape)
+
+    let inline SoftplusT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map (fun x -> (max zero< ^T > x) + log(one< ^T > + exp(-abs(x))))
+        (result, t.Shape)
+
+    let inline SigmoidT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map (fun v -> one / (one + exp -v))
+        (result, t.Shape)
+
+    let inline ExpT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map exp
+        (result, t.Shape)
+
+    let inline LogT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map log
+        (result, t.Shape)
+
+    let inline Log10T(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map log10
+        (result, t.Shape)
+        
+    let inline SqrtT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map sqrt
+        (result, t.Shape)
+        
+    let inline SinT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map sin
+        (result, t.Shape)
+        
+    let inline CosT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map cos
+        (result, t.Shape)                
+        
+    let inline TanT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map tan
+        (result, t.Shape)
+        
+    let inline SinhT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map sinh
+        (result, t.Shape)
+        
+    let inline CoshT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map cosh
+        (result, t.Shape)                
+        
+    let inline TanhT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map tanh
+        (result, t.Shape)
+
+    let inline AsinT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map asin
+        (result, t.Shape)
+        
+    let inline AcosT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map acos
+        (result, t.Shape)                
+        
+    let inline AtanT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map atan
+        (result, t.Shape)
+
+    let inline Random ofDouble (shape:Shape) : (^T[] * Shape) =
+        let values = Array.init (shapeLength shape) (fun _ -> ofDouble (TensorMath.Util.Random.Uniform()))
+        (values, shape)
+
+    let inline RandomNormal ofDouble (shape:Shape) : (^T[] * Shape) =
+        let values = Array.init (shapeLength shape) (fun _ -> ofDouble (TensorMath.Util.Random.Normal()))
+        (values, shape)
+
+    let inline RandomInt ofInt (shape:Shape) (low:int) (high:int) : (^T[] * Shape) =
+        let values = Array.init (shapeLength shape) (fun _ -> ofInt (TensorMath.Util.Random.Integer(low, high)))
+        (values, shape)
+
+/// The concrete implementation of RawTensor for Float32 data.
+type RawTensorFloat32(values: float32[], shape:Shape, device) =
+    inherit RawTensorCPU<float32>(values, shape, Dtype.Float32, device)
+    let create(values, shape) : RawTensor = upcast RawTensorFloat32(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device) 
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorFloat32(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorFloat32(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, float32 relativeTolerance, float32 absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSingle(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSingle(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSingle()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSingle()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSingle(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSingle()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toSingle(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toSingle()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> float32) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = RawTensorCPU.InverseT(t) :> _
+    override t.DetT() = RawTensorCPU.DetT(t) :> _
+    override a.SolveTT(b) = RawTensorCPU.SolveTT(a, b) :> _
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSingle()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random float32 shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal float32 shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt float32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorFloat64(values: double[], shape:Shape, device) =
+    inherit RawTensorCPU<double>(values, shape, Dtype.Float64, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorFloat64(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorFloat64(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorFloat64(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, relativeTolerance, absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toDouble() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toDouble() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toDouble(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toDouble(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toDouble()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toDouble()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toDouble(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toDouble()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toDouble(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toDouble()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D double (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D double (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D double (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D double (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D double (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D double (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> double) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = RawTensorCPU.InverseT(t) :> _
+    override t.DetT() = RawTensorCPU.DetT(t) :> _
+    override a.SolveTT(b) = RawTensorCPU.SolveTT(a, b) :> _
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toDouble()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random double shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal double shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt double shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt8(values: int8[], shape:Shape, device) =
+    inherit RawTensorCPU<int8>(values, shape, Dtype.Int8, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt8(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt8(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt8(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSByte(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSByte(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSByte()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSByte()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSByte(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSByte()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int8 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int8 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int8 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int8 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int8 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int8 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int8) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSByte()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int8
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int8
+    static member RandomInt(shape, low, high, device) = RawTensorCPU.RandomInt int8 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorByte(values: byte[], shape:Shape, device) =
+    inherit RawTensorCPU<byte>(values, shape, Dtype.Byte, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorByte(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorByte(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorByte(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toByte(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toByte(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toByte()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toByte()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toByte(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toByte()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D byte (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D byte (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D byte (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D byte (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D byte (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D byte (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (sbyte >> (~-) >> byte ) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (min 1uy) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT id t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toByte()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Byte
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Byte
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt byte shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt16(values: int16[], shape:Shape, device) =
+    inherit RawTensorCPU<int16>(values, shape, Dtype.Int16, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt16(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt16(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt16(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt16() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt16() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toInt16(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toInt16(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toInt16()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toInt16()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toInt16(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toInt16()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int16 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int16 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int16 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int16 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int16 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int16 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int16) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toInt16()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int16
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int16
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt int16 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt32(values: int32[], shape:Shape, device) =
+    inherit RawTensorCPU<int32>(values, shape, Dtype.Int32, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt32(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt32(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt32(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt32() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt32() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toInt32(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toInt32(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toInt32()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toInt32()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toInt32(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toInt32()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int32) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toInt32()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int32
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int32
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt int32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt64(values: int64[], shape:Shape, device) =
+    inherit RawTensorCPU<int64>(values, shape, Dtype.Int64, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt64(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt64(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt64(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt64() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt64() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toInt64(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toInt64(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toInt64()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toInt64()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toInt64(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toInt64()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int64 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int64 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int64 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int64 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int64 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int64 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int64) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toInt64()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int64
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int64
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt int64 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorBool(values: bool[], shape:Shape, device) =
+    inherit RawTensorCPU<bool>(values, shape, Dtype.Bool, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorBool(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t1.LtTT(t2) = t1.MakeLike(Array.map2 (<) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.GtTT(t2) = t1.MakeLike(Array.map2 (>) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.LeTT(t2) = t1.MakeLike(Array.map2 (<=) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.GeTT(t2) = t1.MakeLike(Array.map2 (>=) t1.Values (t2.GetTypedValues()), t1.Shape) 
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> create
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> create
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) = 
+        let alpha = match alpha with Some v -> v.toBool() | None -> true
+        t1.MakeLike(Array.map2 (||) t1.Values (Array.map (fun x -> alpha && x) (t2.GetTypedValues())), t1.Shape)
+    override t1.AddTT0(t2, alpha) =
+        let t2 = t2.toBool() 
+        let alpha = match alpha with Some v -> v.toBool() | None -> true
+        let values = Array.map (fun a -> a || (alpha && t2)) t1.Values
+        t1.MakeLike(values, t1.Shape)
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((||), t1, location, t2) |> create
+    override t1.MulTT(t2) = t1.MakeLike(Array.map2 (&&) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.MulTT0(t2) = 
+        let t2 = t2.toBool() 
+        t1.MakeLike(Array.map (fun a -> a && t2) t1.Values, t1.Shape)
+    override t.SumT(resultType) = t.Cast(Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = t :> _
+
+    override t.ClampT(_low, _high) = opNotSupported "Clamp" t.Dtype
+    override t1.SubTT(t2) = opNotSupported2 "SubTT" t1.Dtype t2.Dtype
+    override t2.SubFromT0T(_t1) = opNotSupported "SubT0T" t2.Dtype
+    override t1.SubTT0(_t2) = opNotSupported "SubTT0" t1.Dtype
+    override t1.DivTT(t2) = opNotSupported2 "DivTT" t1.Dtype t2.Dtype
+    override t2.DivFromT0T(_t1) = opNotSupported "DivT0T" t2.Dtype
+    override t1.DivTT0(_t2) = opNotSupported "DivTT0" t1.Dtype
+    override t1.MatMulTT(t2) = opNotSupported2 "MatMulTT" t1.Dtype t2.Dtype
+    override t1.BMMTT(t2) = opNotSupported2 "BMMTT" t1.Dtype t2.Dtype
+    override t1.MaxPool1D(_kernelSize, _stride, _padding) = opNotSupported "MaxPool1D" t1.Dtype
+    override t1.MaxPool2D(_kernelSize, _stride, _padding) = opNotSupported "MaxPool2D" t1.Dtype
+    override t1.MaxPool3D(_kernelSize, _stride, _padding) = opNotSupported "MaxPool3D" t1.Dtype
+    override t1.MaxUnpool1D(_indices, _outputSize) = opNotSupported "MaxUnpool1D" t1.Dtype
+    override t1.MaxUnpool2D(_indices, _outputSize) = opNotSupported "MaxUnpool2D" t1.Dtype
+    override t1.MaxUnpool3D(_indices, _outputSize) = opNotSupported "MaxUnpool3D" t1.Dtype
+    override t1.Conv1D(t2, _stride, _padding) = opNotSupported2 "Conv1D" t1.Dtype t2.Dtype
+    override t1.Conv2D(t2, _stride, _padding) = opNotSupported2 "Conv2D" t1.Dtype t2.Dtype
+    override t1.Conv3D(t2, _stride, _padding) = opNotSupported2 "Conv3D" t1.Dtype t2.Dtype
+    override t1.AvgPool1D(_kernelSize, _stride, _padding) = opNotSupported "AvgPool1D" t1.Dtype
+    override t1.AvgPool2D(_kernelSize, _stride, _padding) = opNotSupported "AvgPool2D" t1.Dtype
+    override t1.AvgPool3D(_kernelSize, _stride, _padding) = opNotSupported "AvgPool3D" t1.Dtype
+    override t1.AvgPoolReverse1D(_originalInput, _kernelSize, _stride, _padding) = opNotSupported "AvgPoolReverse1D" t1.Dtype
+    override t1.AvgPoolReverse2D(_originalInput, _kernelSize, _stride, _padding) = opNotSupported "AvgPoolReverse2D" t1.Dtype
+    override t1.AvgPoolReverse3D(_originalInput, _kernelSize, _stride, _padding) = opNotSupported "AvgPoolReverse3D" t1.Dtype
+    override t.NegT() = opNotSupported "NegT" t.Dtype
+    override t.AbsT() = opNotSupported "AbsT" t.Dtype
+    override t.ReluT() = opNotSupported "ReluT" t.Dtype
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = ([| false |], Shape.scalar) |> createOn device
+    static member One(device) = ([| true |], Shape.scalar) |> createOn device
+    static member Zeros(shape:Shape, device) = (Array.zeroCreate (shapeLength shape), shape) |> createOn device
+    static member Empty(shape:Shape, device) = (Array.zeroCreate (shapeLength shape), shape) |> createOn device
+    static member Ones(shape:Shape, device) = (Array.create (shapeLength shape) true, shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toBool()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Bool
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Bool
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt System.Convert.ToBoolean shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+/// The concrete implementation of RawTensor for Float16 data.
+type RawTensorFloat16(values: float32[], shape:Shape, device) =
+    inherit RawTensorCPU<float32>(values, shape, Dtype.Float16, device)
+    let create(values, shape) : RawTensor = upcast RawTensorFloat16(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device) 
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorFloat16(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorFloat16(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, float32 relativeTolerance, float32 absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSingle(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSingle(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSingle()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSingle()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSingle(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSingle()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toSingle(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toSingle()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> float32) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSingle()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random float32 shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal float32 shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt float32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+/// The concrete implementation of RawTensor for Float16 data.
+type RawTensorBFloat16(values: float32[], shape:Shape, device) =
+    inherit RawTensorCPU<float32>(values, shape, Dtype.BFloat16, device)
+    let create(values, shape) : RawTensor = upcast RawTensorBFloat16(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device) 
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorBFloat16(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorBFloat16(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, float32 relativeTolerance, float32 absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSingle(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSingle(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSingle()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSingle()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSingle(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSingle()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toSingle(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toSingle()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> float32) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSingle()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random float32 shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal float32 shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt float32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+#if TEST_DUPLICATE_BACKEND
+type TestDuplicateBackendTensorStatics() = 
+#else
+type ReferenceBackendTensorStatics() = 
+#endif
+
+    inherit BackendTensorStatics()
+
+    override _.GetDevices(deviceType) =
+        match deviceType with 
+        | None -> [ Device.CPU (* ; Device.GPU *) ]
+        | Some DeviceType.CPU -> [ Device.CPU]
+        //| Some DeviceType.CUDA -> [ Device.GPU ]
+        | Some _ -> []
+
+    override _.IsDeviceTypeAvailable (deviceType) = (match deviceType with DeviceType.CPU -> true | _ -> false)
+    override _.Seed(seed) = Random.Seed(seed)
+    override _.Zero(dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Zero(device)
+        | BFloat16 -> RawTensorBFloat16.Zero(device)
+        | Float32 -> RawTensorFloat32.Zero(device)
+        | Float64 -> RawTensorFloat64.Zero(device)
+        | Int8 -> RawTensorInt8.Zero(device)
+        | Byte -> RawTensorByte.Zero(device)
+        | Int16 -> RawTensorInt16.Zero(device)
+        | Int32 -> RawTensorInt32.Zero(device)
+        | Int64 -> RawTensorInt64.Zero(device)
+        | Bool -> RawTensorBool.Zero(device)
+    override _.One(dtype, device) = 
+        match dtype with 
+        | Float16 -> RawTensorFloat16.One(device)
+        | BFloat16 -> RawTensorBFloat16.One(device)
+        | Float32 -> RawTensorFloat32.One(device)
+        | Float64 -> RawTensorFloat64.One(device)
+        | Int8 -> RawTensorInt8.One(device)
+        | Byte -> RawTensorByte.One(device)
+        | Int16 -> RawTensorInt16.One(device)
+        | Int32 -> RawTensorInt32.One(device)
+        | Int64 -> RawTensorInt64.One(device)
+        | Bool -> RawTensorBool.One(device)
+    override _.Zeros(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Zeros(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Zeros(shape, device)
+        | Float32 -> RawTensorFloat32.Zeros(shape, device)
+        | Float64 -> RawTensorFloat64.Zeros(shape, device)
+        | Int8 -> RawTensorInt8.Zeros(shape, device)
+        | Byte -> RawTensorByte.Zeros(shape, device)
+        | Int16 -> RawTensorInt16.Zeros(shape, device)
+        | Int32 -> RawTensorInt32.Zeros(shape, device)
+        | Int64 -> RawTensorInt64.Zeros(shape, device)
+        | Bool -> RawTensorBool.Zeros(shape, device)
+    override _.Empty(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Empty(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Empty(shape, device)
+        | Float32 -> RawTensorFloat32.Empty(shape, device)
+        | Float64 -> RawTensorFloat64.Empty(shape, device)
+        | Int8 -> RawTensorInt8.Empty(shape, device)
+        | Byte -> RawTensorByte.Empty(shape, device)
+        | Int16 -> RawTensorInt16.Empty(shape, device)
+        | Int32 -> RawTensorInt32.Empty(shape, device)
+        | Int64 -> RawTensorInt64.Empty(shape, device)
+        | Bool -> RawTensorBool.Empty(shape, device)
+    override _.Ones(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Ones(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Ones(shape, device)
+        | Float32 -> RawTensorFloat32.Ones(shape, device)
+        | Float64 -> RawTensorFloat64.Ones(shape, device)
+        | Int8 -> RawTensorInt8.Ones(shape, device)
+        | Byte -> RawTensorByte.Ones(shape, device)
+        | Int16 -> RawTensorInt16.Ones(shape, device)
+        | Int32 -> RawTensorInt32.Ones(shape, device)
+        | Int64 -> RawTensorInt64.Ones(shape, device)
+        | Bool -> RawTensorBool.Ones(shape, device)
+    override _.Full(shape:Shape, value:scalar, dtype, device) = 
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Full(shape, value, device)
+        | BFloat16 -> RawTensorBFloat16.Full(shape, value, device)
+        | Float32 -> RawTensorFloat32.Full(shape, value, device)
+        | Float64 -> RawTensorFloat64.Full(shape, value, device)
+        | Int8 -> RawTensorInt8.Full(shape, value, device)
+        | Byte -> RawTensorByte.Full(shape, value, device)
+        | Int16 -> RawTensorInt16.Full(shape, value, device)
+        | Int32 -> RawTensorInt32.Full(shape, value, device)
+        | Int64 -> RawTensorInt64.Full(shape, value, device)
+        | Bool -> RawTensorBool.Full(shape, value, device)
+    override _.Random(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Random(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Random(shape, device)
+        | Float32 -> RawTensorFloat32.Random(shape, device)
+        | Float64 -> RawTensorFloat64.Random(shape, device)
+        | Int8 -> RawTensorInt8.Random(shape, device)
+        | Byte -> RawTensorByte.Random(shape, device)
+        | Int16 -> RawTensorInt16.Random(shape, device)
+        | Int32 -> RawTensorInt32.Random(shape, device)
+        | Int64 -> RawTensorInt64.Random(shape, device)
+        | Bool -> RawTensorBool.Random(shape, device)
+    override _.RandomNormal(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.RandomNormal(shape, device)
+        | BFloat16 -> RawTensorBFloat16.RandomNormal(shape, device)
+        | Float32 -> RawTensorFloat32.RandomNormal(shape, device)
+        | Float64 -> RawTensorFloat64.RandomNormal(shape, device)
+        | Int8 -> RawTensorInt8.RandomNormal(shape, device)
+        | Byte -> RawTensorByte.RandomNormal(shape, device)
+        | Int16 -> RawTensorInt16.RandomNormal(shape, device)
+        | Int32 -> RawTensorInt32.RandomNormal(shape, device)
+        | Int64 -> RawTensorInt64.RandomNormal(shape, device)
+        | Bool -> RawTensorBool.RandomNormal(shape, device)
+    override _.RandomInt(shape:Shape, low:int, high:int, dtype, device) = 
+        match dtype with 
+        | Float16 -> RawTensorFloat16.RandomInt(shape, low, high, device)
+        | BFloat16 -> RawTensorBFloat16.RandomInt(shape, low, high, device)
+        | Float32 -> RawTensorFloat32.RandomInt(shape, low, high, device)
+        | Float64 -> RawTensorFloat64.RandomInt(shape, low, high, device)
+        | Int8 -> RawTensorInt8.RandomInt(shape, low, high, device)
+        | Byte -> RawTensorByte.RandomInt(shape, low, high, device)
+        | Int16 -> RawTensorInt16.RandomInt(shape, low, high, device)
+        | Int32 -> RawTensorInt32.RandomInt(shape, low, high, device)
+        | Int64 -> RawTensorInt64.RandomInt(shape, low, high, device)
+        | Bool -> RawTensorBool.RandomInt(shape, low, high, device)
+    override _.CreateFromFlatArray(values:Array, shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.CreateFromFlatArray(values, shape, device)
+        | BFloat16 -> RawTensorBFloat16.CreateFromFlatArray(values, shape, device)
+        | Float32 -> RawTensorFloat32.CreateFromFlatArray(values, shape, device)
+        | Float64 -> RawTensorFloat64.CreateFromFlatArray(values, shape, device)
+        | Int8 -> RawTensorInt8.CreateFromFlatArray(values, shape, device)
+        | Byte -> RawTensorByte.CreateFromFlatArray(values, shape, device)
+        | Int16 -> RawTensorInt16.CreateFromFlatArray(values, shape, device)
+        | Int32 -> RawTensorInt32.CreateFromFlatArray(values, shape, device)
+        | Int64 -> RawTensorInt64.CreateFromFlatArray(values, shape, device)
+        | Bool -> RawTensorBool.CreateFromFlatArray(values, shape, device)
+
diff --git a/src/TensorMath.Backends.Reference/TensorMath.Backends.Reference.fsproj b/src/TensorMath.Backends.Reference/TensorMath.Backends.Reference.fsproj
new file mode 100644
index 0000000..ab19083
--- /dev/null
+++ b/src/TensorMath.Backends.Reference/TensorMath.Backends.Reference.fsproj
@@ -0,0 +1,16 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.1</TargetFramework>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="Reference.RawTensor.fs" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\TensorMath\TensorMath.fsproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/TensorMath.Backends.Torch/TensorMath.Backends.Torch.fsproj b/src/TensorMath.Backends.Torch/TensorMath.Backends.Torch.fsproj
new file mode 100644
index 0000000..565f37c
--- /dev/null
+++ b/src/TensorMath.Backends.Torch/TensorMath.Backends.Torch.fsproj
@@ -0,0 +1,21 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net8.0</TargetFramework>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="Torch.RawTensor.fs" />
+    <Compile Include="TensorMath.Torch.fs" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="TorchSharp" Version="0.102.4" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\TensorMath\TensorMath.fsproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/TensorMath.Backends.Torch/TensorMath.Torch.fs b/src/TensorMath.Backends.Torch/TensorMath.Torch.fs
new file mode 100644
index 0000000..266a36c
--- /dev/null
+++ b/src/TensorMath.Backends.Torch/TensorMath.Torch.fs
@@ -0,0 +1,33 @@
+﻿namespace TensorMath
+
+open TensorMath
+open TensorMath.Backends.Torch
+open TorchSharp
+
+[<AutoOpen>]
+module TorchExtensions =
+
+    type dsharp with
+
+        /// <summary>
+        /// Creates a new TensorMath tensor from the torch tensor.
+        /// </summary>
+        static member fromTorch(tt: torch.Tensor) =
+            Tensor.ofRawTensor(TorchRawTensor(tt))
+
+    type Tensor with
+        /// <summary>
+        /// Converts the primal of a tensor to a torch tensor.
+        /// </summary>
+        /// <remarks>
+        /// If the tensor does not use the Torch backend an exception is raised.
+        ///
+        /// Note that this operation takes the primal of the tensor. This means
+        /// code that converts to Torch tensors will not be differentiable using
+        /// TensorMath differentiation capabilities.
+        /// </remarks>
+        member t.toTorch() =
+            match t.primalRaw with
+            | :? TorchRawTensor as trt -> trt.TorchTensor
+            | _ -> failwith $"toTorch: the input is not a TensorMath.Backends.Torch tensor, its backend is {t.backend}"
+
diff --git a/src/TensorMath.Backends.Torch/Torch.RawTensor.fs b/src/TensorMath.Backends.Torch/Torch.RawTensor.fs
new file mode 100644
index 0000000..41ee780
--- /dev/null
+++ b/src/TensorMath.Backends.Torch/Torch.RawTensor.fs
@@ -0,0 +1,1594 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace rec TensorMath.Backends.Torch
+
+open System
+open TensorMath
+open TensorMath.Backends
+open TensorMath.Util
+open TorchSharp
+
+type torch_cuda = torch.cuda
+type TorchShape = int64[]
+type TorchDevice = Torch.Device
+type Device = TensorMath.Device
+[<AutoOpen>]
+module internal Utils = 
+
+    let int64s (b: int[]) = Array.map int64 b
+
+    let toTorchType dtype =
+        match dtype with 
+        | Dtype.Bool -> torch.ScalarType.Bool
+        | Dtype.Int8 -> torch.ScalarType.Int8
+        | Dtype.Byte -> torch.ScalarType.Byte
+        | Dtype.Int16 -> torch.ScalarType.Int16
+        | Dtype.Int32 -> torch.ScalarType.Int32
+        | Dtype.Int64 -> torch.ScalarType.Int64
+        | Dtype.Float16 -> torch.ScalarType.Float16
+        | Dtype.BFloat16 -> torch.ScalarType.BFloat16
+        | Dtype.Float32 -> torch.ScalarType.Float32
+        | Dtype.Float64 -> torch.ScalarType.Float64
+
+    /// WARNING: TorchSharp Scalar creation is buggy and doesn't preserve types: https://github.com/xamarin/TorchSharp/issues/331
+    let toTorchScalar (x: scalar) =
+        match x.GetTypeCode() with 
+        | TypeCode.Single -> Scalar.op_Implicit (x.toSingle())
+        | TypeCode.Double -> Scalar.op_Implicit (x.toDouble())
+        | TypeCode.Int32 -> Scalar.op_Implicit (x.toInt32())
+        | TypeCode.Int64 -> Scalar.op_Implicit (x.toInt64())
+        | TypeCode.Byte -> Scalar.op_Implicit (x.toByte())
+        | TypeCode.SByte -> Scalar.op_Implicit (x.toSByte())
+        | TypeCode.Int16 -> Scalar.op_Implicit (x.toInt16())
+        | TypeCode.Boolean -> Scalar.op_Implicit (x.toBool())
+        | t -> failwithf "unknown scalar type '%A'" t
+
+    let fromTorchType ttype =
+        match ttype with 
+        | torch.ScalarType.Bool -> Dtype.Bool
+        | torch.ScalarType.Int8 -> Dtype.Int8
+        | torch.ScalarType.Byte -> Dtype.Byte
+        | torch.ScalarType.Int16 -> Dtype.Int16
+        | torch.ScalarType.Int32 -> Dtype.Int32
+        | torch.ScalarType.Int64 -> Dtype.Int64
+        | torch.ScalarType.Float32 -> Dtype.Float32
+        | torch.ScalarType.Float64 -> Dtype.Float64
+        |  _ -> failwith "fromTorchType - other type"
+
+    let toTorchShape (shape: Shape) : TorchShape = int64s shape
+
+    let fromTorchShape (shape: int64[]) = shape |> Array.map int
+
+    type TensorMath.DeviceType with 
+        member x.ToTorch : TorchSharp.DeviceType = enum (int x)
+
+    type TensorMath.Device with 
+        member x.ToTorch = torch.Device(x.DeviceType.ToTorch, x.DeviceIndex)
+
+    let fromTorchDeviceType (x: TorchSharp.DeviceType) : TensorMath.DeviceType = enum (int x)
+
+    let fromTorchDevice (x: torch.Device) = TensorMath.Device(fromTorchDeviceType x.``type``, x.index)
+
+    let inline combineHashes (h1 : int) (h2 : int) = ((h1 <<< 5) + h1) ^^^ h2
+
+    let torchMoveTo (tt: torch.Tensor) (device: Device) =
+        tt.``to``(device.ToTorch)
+
+    type RawTensor with
+        member x.TorchTensor = (x :?> TorchRawTensor).TorchTensor
+
+/// This is the base class for all RawTensorXyz tuypes.
+/// All type-independent operations are implemented directly on this class. 
+type TorchRawTensor(tt: torch.Tensor, shape: Shape, dtype: Dtype, device: Device) =
+
+    inherit RawTensor()
+
+    // Note, shape and dtype are stored as fields. These dupicate information in TorchTensor, but
+    // it is a little too costly to repeatedly re-extract this information.
+    //
+    // 'device' is not stored as a field, it is rarely accessed and can be fetched from TorchTensor
+
+#if DEBUG
+    // Check the invariants associated with the tensors
+    do 
+       if tt.dtype <> toTorchType dtype then
+           failwithf "mismatched Torch tensor type, expected %A, got %A" (toTorchType dtype) tt.dtype
+
+       if int tt.device_type <> int device.DeviceType then
+           failwithf "mismatched Torch tensor device, expected %A, got %A" tt.device_type device.DeviceType
+
+       if int tt.device_index <> int device.DeviceIndex then
+           failwithf "mismatched Torch tensor index, expected %A, got %A" tt.device_index device.DeviceIndex
+
+       if toTorchShape shape <> tt.shape then 
+           failwithf "mismatched Torch tensor shape, expected %A, got %A" (toTorchShape shape) tt.shape
+
+    let device = () // make sure 'device' isn't accessed in a member and stored as a field
+#endif
+    let mutable tt = tt
+    let mutable isMutable = false
+    let checkMutable() = if not isMutable then failwith "the tensor can't be mutated" 
+    do ignore device
+
+    override _.Shape = shape
+    override _.Dim = shape.Length
+    override _.Nelement = shapeLength shape
+    override _.Dtype = dtype
+    override _.DeviceType : TensorMath.DeviceType = enum (int tt.device_type)
+    override t.Device = TensorMath.Device(t.DeviceType, tt.device_index)
+    override _.Backend = Backend.Torch
+    override _.Handle = box tt
+
+    new (tt: torch.Tensor) =
+        TorchRawTensor(tt, fromTorchShape tt.shape, fromTorchType tt.dtype, fromTorchDevice tt.device)
+
+    member t.MakeLike(tt, ?shape, ?dtype, ?device) : RawTensor =
+        upcast TorchRawTensor(tt, defaultArg shape t.Shape, defaultArg dtype t.Dtype, defaultArg device t.Device)
+
+    member _.TorchTensor = tt
+
+    override t.GetItem(indexes:int[]) =
+        Shape.checkCanIndex t.Shape indexes
+        if t.Shape.Length = 0 then t.ToScalar()
+        else t.MakeLike(tt=tt[indexes |> Array.map (fun v -> torch.TensorIndex.Single(int64 v))], shape=[||]).ToScalar()
+
+    override t.GetSlice(fullBounds:int[,]) =
+        let n = fullBounds.GetLength(0)
+        let newShape = Shape.checkCanGetSlice t.Shape fullBounds
+
+        let indices =
+            Array.init n (fun i -> 
+                let start = fullBounds[i,0]
+                let stop = fullBounds[i,1] + 1
+                let len = stop - start
+                if fullBounds[i,2] = 1 && len = 1 then
+                    torch.TensorIndex.Single(int64 start)
+                else
+                    torch.TensorIndex.Slice(start=int64 start, stop=int64 stop))
+        let res = tt.index(indices)
+        t.MakeLike(tt=res, shape=newShape)
+
+    override t.Clone() =
+        t.MakeLike(tt.clone())
+
+    override t.ComputeHash() = 
+        // Torch Tensors must be CPU before Data can be accessed
+        let tt = torchMoveTo tt Device.CPU
+
+        let shape = t.Shape
+        let mutable res = hash shape
+        let n = shapeLength shape
+        match dtype with 
+        | Dtype.Int8 ->
+            let data = tt.data<sbyte>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (int32 data[int64 i])
+        | Dtype.Byte ->
+            let data = tt.data<byte>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (int32 data[int64 i])
+        | Dtype.Bool ->
+            let data = tt.data<byte>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (int32 data[int64 i])
+        | Dtype.Int16 ->
+            let data = tt.data<int16>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (int32 data[int64 i] )
+        | Dtype.Int32 ->
+            let data = tt.data<int32>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (int32 data[int64 i])
+        | Dtype.Int64 -> 
+            let data = tt.data<int64>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (int32 data[int64 i])
+        | Dtype.Float16 ->
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (hash (tt.ReadCpuFloat16(int64 i)))
+        | Dtype.BFloat16 ->
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (hash (tt.ReadCpuBFloat16(int64 i)))
+        | Dtype.Float32 ->
+            let data = tt.data<single>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (hash data[int64 i])
+        | Dtype.Float64 ->
+            let data = tt.data<double>()
+            for i in 0 .. n-1 do
+                 res <- combineHashes res (hash data[int64 i])
+        res
+    
+    override t.Expand(newShape) =
+        t.MakeLike(tt.expand(toTorchShape newShape), shape=newShape)
+
+    override _.ToScalar() : scalar =
+        match dtype with 
+        | Dtype.Bool -> tt.ToBoolean() :> scalar
+        | Dtype.Byte -> tt.ToByte() :> scalar
+        | Dtype.Int8 -> tt.ToSByte() :> scalar
+        | Dtype.Int16 -> tt.ToInt16() :> scalar
+        | Dtype.Int32 -> tt.ToInt32() :> scalar
+        | Dtype.Int64 -> tt.ToInt64() :> scalar
+        | Dtype.Float16 -> tt.ToSingle() :> scalar
+        | Dtype.BFloat16 -> tt.ToSingle() :> scalar
+        | Dtype.Float32 -> tt.ToSingle() :> scalar
+        | Dtype.Float64 -> tt.ToDouble() :> scalar
+
+    member t.ToValuesTyped<'T>(conv: torch.Tensor -> 'T) : obj =
+        // Move the tensors to CPU for efficiency since we're accessing all the data anyway
+        let tt = torchMoveTo tt Device.CPU
+        match t.Shape with
+        | [|  |] -> tt.ToScalar() |> box
+        | [| d0 |] -> upcast Array.init<'T> d0 (fun i -> tt[int64 i] |> conv)
+        | [| d0; d1 |] -> upcast Array2D.init<'T> d0 d1 (fun i j -> tt[int64 i, int64 j] |> conv)
+        | [| d0; d1; d2 |]  -> upcast Array3D.init<'T> d0 d1 d2 (fun i j k -> tt[int64 i, int64 j, int64 k] |> conv)
+        | [| d0; d1; d2; d3 |]  -> upcast Array4D.init<'T> d0 d1 d2 d3 (fun i j k l -> tt[int64 i, int64 j, int64 k, int64 l] |> conv)
+        | [| d0; d1; d2; d3; d4 |]  -> upcast Array5D.init<'T> d0 d1 d2 d3 d4 (fun i j k l m -> tt[int64 i, int64 j, int64 k, int64 l, int64 m] |> conv)
+        | [| d0; d1; d2; d3; d4; d5 |]  -> upcast Array6D.init<'T> d0 d1 d2 d3 d4 d5 (fun i j k l m n -> tt[int64 i, int64 j, int64 k, int64 l, int64 m, int64 n] |> conv)
+        | _ -> failwithf "Cannot get array for Tensor dimensions > 6. Consider slicing the Tensor. Shape: %A" t.Shape
+
+    override t.ToValues() =
+        match dtype with 
+        | Dtype.Bool -> t.ToValuesTyped<bool>(fun s -> s.ToBoolean())
+        | Dtype.Byte -> t.ToValuesTyped<byte>(fun s -> s.ToByte())
+        | Dtype.Int8 -> t.ToValuesTyped<sbyte>(fun s -> s.ToSByte())
+        | Dtype.Int16 -> t.ToValuesTyped<int16>(fun s -> s.ToInt16())
+        | Dtype.Int32 -> t.ToValuesTyped<int32>(fun s -> s.ToInt32())
+        | Dtype.Int64 -> t.ToValuesTyped<int64>(fun s -> s.ToInt64())
+        | Dtype.Float16 -> t.ToValuesTyped<float32>(fun s -> s.ToSingle())
+        | Dtype.BFloat16 -> t.ToValuesTyped<float32>(fun s -> s.ToSingle())
+        | Dtype.Float32 -> t.ToValuesTyped<float32>(fun s -> s.ToSingle())
+        | Dtype.Float64 -> t.ToValuesTyped<double>(fun s -> s.ToDouble())
+
+    member private _.ToRawDataViaDirectAccess< 'T when 'T: struct and 'T :> ValueType and 'T : (new : unit -> 'T) >() : 'T[] =
+        // Torch Tensors must be CPU before raw data can be accessed
+        let tt2 = torchMoveTo tt Device.CPU
+
+        let data = tt2.data<'T>()
+        let res = Array.zeroCreate<'T> (int32 tt2.NumberOfElements)
+        for i in 0 .. int32 tt2.NumberOfElements - 1 do
+            res[i] <- data[int64 i]
+        res
+
+    member t.ToRawData() : Array =
+        match dtype with 
+        | Dtype.Bool -> t.ToRawDataViaDirectAccess<bool>() :> _
+        | Dtype.Byte -> t.ToRawDataViaDirectAccess<byte>() :> _
+        | Dtype.Int8 -> t.ToRawDataViaDirectAccess<sbyte>() :> _
+        | Dtype.Int16 -> t.ToRawDataViaDirectAccess<int16>() :> _
+        | Dtype.Int32 -> t.ToRawDataViaDirectAccess<int32>() :> _
+        | Dtype.Int64 -> t.ToRawDataViaDirectAccess<int64>() :> _
+        | Dtype.Float32 -> t.ToRawDataViaDirectAccess<float32>() :> _
+        | Dtype.Float64 -> t.ToRawDataViaDirectAccess<double>() :> _
+        | Dtype.Float16 -> 
+            // Move the tensors to CPU for efficiency since we're accessing all the data anyway
+            let tt2 = torchMoveTo tt Device.CPU
+            Array.init<float32> (int32 tt2.NumberOfElements) (int64 >> tt2.ReadCpuFloat16) :> _
+        | Dtype.BFloat16 -> 
+            // Move the tensors to CPU for efficiency since we're accessing all the data anyway
+            let tt2 = torchMoveTo tt Device.CPU
+            Array.init<float32> (int32 tt2.NumberOfElements) (int64 >> tt2.ReadCpuBFloat16) :> _
+
+    override _.StackTs(tensors, dim) =
+        let tts, shapes = tensors |> Array.map (fun t -> (t :?> TorchRawTensor).TorchTensor, t.Shape) |> Array.unzip
+        let _n, _shape1, _shape2, newShape = Shape.checkCanStack shapes dim
+        let result = torch.stack(tts, int64 dim)
+        (tensors[0] :?> TorchRawTensor).MakeLike(result, newShape)
+
+    override t.UnstackT(dim) = 
+        let shape = t.Shape
+        let _shape1, _shape2, unstackedShape = Shape.checkCanUnstack shape dim
+        let results = tt.unbind(dim)
+        results |> Array.map (fun rvalues -> t.MakeLike(rvalues, shape=unstackedShape))
+
+    override t.CatTs(tensors, dim) = 
+        let values, shapes = tensors |> Array.map (fun t -> t.TorchTensor, t.Shape) |> Array.unzip
+        let _n, _shape1, _m2, _shape3, outShape = Shape.checkCanCat shapes dim
+        let result = torch.cat(values, int64 dim)
+        t.MakeLike(result, outShape)
+
+    override t.SplitT(sizes, dim) =
+        let shape = t.Shape
+        let outShapes = Shape.checkCanSplit shape sizes dim
+        let results = tt.split(int64s sizes, int64 dim)
+        (results, outShapes) ||> Array.map2 (fun rvalues outShape -> 
+            t.MakeLike(rvalues, shape=outShape))
+
+    override t.PermuteT(permutation) =
+        let _, newShape = Shape.checkCanPermute t.Shape permutation
+        let result = tt.permute(int64s permutation)
+        t.MakeLike(result, shape=newShape)
+
+    override t.TransposeT(dim0, dim1) =
+        Shape.checkCanTranspose t.Shape dim0 dim1
+        let result = tt.transpose(int64 dim0, int64 dim1)
+        let shape = result.shape |> Array.map int32
+        t.MakeLike(result, shape=shape)
+
+    override t.TransposeT2() =
+        Shape.checkCanTranspose2d t.Dim
+        let newShape = Shape.computeTranspose2d t.Shape
+        let result = tt.t()
+        t.MakeLike(result, shape=newShape)
+
+    override t.InverseT() =
+        Shape.checkCanInvert t.Shape
+        let result = tt.inverse()
+        t.MakeLike(result, shape=t.Shape)
+
+    override t.DetT() =
+        Shape.checkCanDet t.Shape
+        let result = torch.linalg.det(tt)
+        let shape = result.shape |> Array.map int32
+        t.MakeLike(result, shape=shape)
+
+    override t1.SolveTT(t2) =
+        let newShape = Shape.checkCanSolve t1.Shape t2.Shape
+        let result = torch.linalg.solve(tt, t2.TorchTensor)
+        t1.MakeLike(result, shape=newShape)
+
+    override t.SqueezeT(dim) = 
+        let shape = t.Shape
+        let newShape = Shape.squeeze dim shape
+        let mutable res = tt
+        let mutable c = 0
+        for i in 0 .. t.Dim - 1 do
+            if shape[i] = 1 && (dim = -1 || i = dim) then 
+                res <- res.squeeze(int64 c)
+            else   
+                c <- c + 1
+        t.MakeLike(res, shape=newShape)
+
+    override t.UnsqueezeT(dim) = 
+        let outputShape = Shape.checkCanUnsqueeze dim t.Shape
+        t.MakeLike(tt.unsqueeze(int64 dim), shape=outputShape)
+
+    override t.FlipT(dims:int[]) = 
+        // "flip_cuda" not implemented for 'Bool'"
+        let result =
+            if dtype = Dtype.Bool then 
+                tt.to_type(torch.ScalarType.Byte).flip(int64s dims).to_type(torch.ScalarType.Bool)
+            elif dtype = Dtype.Float16 || dtype = Dtype.BFloat16  then 
+                tt.to_type(torch.ScalarType.Float32).flip(int64s dims).to_type(toTorchType dtype)
+            else
+                tt.flip(int64s dims)
+        t.MakeLike(result)
+
+    override t.DilateT(dilations:int[]) = 
+        Shape.checkCanDilate t.Dim dilations
+        let outputShape = Shape.dilated t.Shape dilations
+        let dims = dilations.Length
+        let mutable res = tt
+        for i=0 to dims-1 do
+            let s = res.shape
+            s[i] <- int64 outputShape[i]
+            let resnew = t.ZerosLike(fromTorchShape s)
+            let indices = Array.init t.Shape[i] id |> Array.map ((*) dilations[i] >> int64)
+            let mutable d = TorchInt64TensorOps().CreateFromFlatArray(indices, shape=[|t.Shape[i]|], device=t.Device)
+            for _=0 to i-1 do
+                d <- d.UnsqueezeT(0)
+            for _=i+1 to dims-1 do
+                d <- d.UnsqueezeT(d.Dim)
+            d <- d.Expand(fromTorchShape res.shape)
+            res <- resnew.TorchTensor.scatter(int64 i, d.TorchTensor, res)
+        t.MakeLike(res, outputShape)
+
+    override t.UndilateT(dilations:int[]) =
+        let shape = t.Shape
+        let outputShape = Shape.undilatedShape shape dilations
+        let mutable res = tt
+        for d in 0 .. dilations.Length - 1 do
+            res <- res.slice(int64 d, 0L, int64 shape[d], int64 dilations[d])
+        t.MakeLike(res, outputShape)
+
+    override t.GatherT(dim:int, indices) =
+        Shape.checkCanGather t.Shape dim indices.Shape indices.Dtype
+
+        // NOTE: TensorMath currently expects indices as an Int32 tensor, Torch wants Int64
+        let indices = indices.Cast(Dtype.Int64)
+        let res = 
+            // LibTorch Gather on float16/bfloat16 gives : method_name not implemented for 'BFloat16'
+            if dtype = Dtype.Float16 || dtype = Dtype.BFloat16  then 
+                tt.to_type(torch.ScalarType.Float32).gather(int64 dim, indices.TorchTensor).to_type(toTorchType dtype)
+            else
+                t.TorchTensor.gather(int64 dim, indices.TorchTensor)
+        t.MakeLike(res, indices.Shape)
+
+    override t.ScatterT(dim:int, indices, destinationShape:Shape) =
+        Shape.checkCanScatter t.Shape dim indices.Shape indices.Dtype destinationShape
+        // NOTE: TensorMath currently expects indices as an Int32 tensor, Torch wants Int64
+        let indices = indices.Cast(Dtype.Int64)
+        let res = t.ZerosLike(destinationShape)
+        // LibTorch Scatter on float16/bfloat16 gives : method_name not implemented for 'BFloat16'
+        if dtype = Dtype.Float16 || dtype = Dtype.BFloat16  then 
+            let res2 = res.TorchTensor.to_type(torch.ScalarType.Float32)
+            res2.scatter_(int64 dim, indices.TorchTensor, t.TorchTensor.to_type(torch.ScalarType.Float32)) |> ignore
+            t.MakeLike(res2.to_type(toTorchType dtype), destinationShape)
+        else
+            res.TorchTensor.scatter_(int64 dim, indices.TorchTensor, t.TorchTensor) |> ignore
+            res
+
+    override t.ViewT(shape:Shape) =
+        Shape.checkCanView t.Shape shape
+        t.MakeLike(tt.reshape(toTorchShape shape), shape=shape)  // Use Reshape instead of View to ensure underlying non-contiguous libtorch tensors can be viewed. Internally Reshape uses View if possible, otherwise it copies data to a contiguous tensor and then views.
+
+    override t.Cast(newDtype: Dtype) =
+        if newDtype = dtype then 
+            upcast t
+        else 
+            let result = tt.to_type(toTorchType newDtype)
+            t.MakeLike(result, dtype=newDtype)
+
+    override t.MoveTo(device) =
+        if t.Device = device then (t :> _) else
+        let tt2 = torchMoveTo tt device
+        t.MakeLike(tt2, device=device)
+
+    override t.Equals(t2:RawTensor) : bool = 
+        if dtype = t2.Dtype then
+            let r1 = (t.Shape = t2.Shape)
+            if not r1 then false else
+            let tt2 = t2.TorchTensor
+            let r2 = tt.Equals(tt2)
+            r2
+        else 
+            opNotSupported2 "Equals" dtype t2.Dtype
+
+    override t.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) =
+        if dtype = t2.Dtype then
+            match dtype with 
+            | Dtype.IntegralOrBool -> t.Equals(t2)
+            | Dtype.Float16 | Dtype.BFloat16 -> 
+               // Need because LibTorch 1.7.0 says "isfinite" not implemented for 'BFloat16'
+               tt.to_type(torch.ScalarType.Float32).allclose(t2.TorchTensor.to_type(torch.ScalarType.Float32), relativeTolerance, absoluteTolerance)
+            | _ -> tt.allclose(t2.TorchTensor, relativeTolerance, absoluteTolerance)
+        else 
+            opNotSupported2 "Equals" dtype t2.Dtype
+
+    override t.ClampT(low, high) =
+        let result = tt.clamp(low.TorchTensor.ToScalar(), high.TorchTensor.ToScalar())
+        t.MakeLike(result)
+
+    override t1.LtTT(t2) =
+        let result = tt.lt(t2.TorchTensor)
+        t1.MakeLike(result, dtype=Dtype.Bool)
+
+    override t1.GtTT(t2) =
+        let result = tt.gt(t2.TorchTensor)
+        t1.MakeLike(result, dtype=Dtype.Bool)
+
+    override t1.LeTT(t2) = 
+        let result = tt.le(t2.TorchTensor)
+        t1.MakeLike(result, dtype=Dtype.Bool)
+
+    override t1.GeTT(t2) = 
+        let result = tt.ge(t2.TorchTensor)
+        t1.MakeLike(result, dtype=Dtype.Bool)
+
+    override t1.EqTT(t2) = 
+        let result = tt.eq(t2.TorchTensor)
+        t1.MakeLike(result, dtype=Dtype.Bool)
+
+    override t1.NeqTT(t2) = 
+        let result = tt.ne(t2.TorchTensor)
+        t1.MakeLike(result, dtype=Dtype.Bool)
+
+    override t.MaxReduceT(dim, keepDim) = 
+        let (struct (maxValues, indexes)) = tt.max(int64 dim, keepdim=keepDim)
+        let newShape = Shape.checkCanMinMaxReduce dim keepDim t.Shape
+        let maxValuesResult = t.MakeLike(maxValues, shape=newShape)
+        let indexesResult = t.MakeLike(indexes, shape=newShape, dtype=Dtype.Int64).Cast(Dtype.Int32)
+        maxValuesResult, indexesResult
+
+    override t.MaxIndexT() = 
+        // LibTorch 1.7.0: Max on float16/bfloat16 causes grief
+        let tt = 
+            if dtype = Dtype.Float16 || dtype = Dtype.BFloat16 then 
+                tt.to_type(torch.ScalarType.Float32)
+            else
+                tt
+        let res = Array.zeroCreate<int64> t.Dim
+        let idxs = Array.zeroCreate t.Dim
+        let mutable values = tt
+        // repeatedly reduce, tracking the recorded index for the final maximum eventually selected
+        for i = t.Dim - 1 downto 0 do 
+            let (struct (values2, indexes)) = values.max(int64 i)
+            values <- values2
+            idxs[i] <- indexes
+
+        for i = 0 to t.Dim - 1 do 
+            let idx = idxs[i]
+
+            res[i] <- 
+                match i with 
+                | 0 -> idx.ToInt64()
+                | 1 -> idx[res[0]].ToInt64() 
+                | 2 -> idx[res[0], res[1]].ToInt64() 
+                | 3 -> idx[res[0], res[1], res[2]].ToInt64() 
+                | 4 -> idx[res[0], res[1], res[2], res[3]].ToInt64() 
+                | 5 -> idx[res[0], res[1], res[2], res[3], res[4]].ToInt64() 
+                | 6 -> idx[res[0], res[1], res[2], res[3], res[4], res[5]].ToInt64() 
+                | _ -> failwith "MaxIndexT > 6d nyi for torch"
+        res |> Array.map int32
+
+    override t.MinReduceT(dim, keepDim) = 
+        let (struct (minValues, indexes)) = tt.min(int64 dim, keepdim=keepDim)
+        let newShape = Shape.checkCanMinMaxReduce dim keepDim t.Shape
+        let minValuesResult = t.MakeLike(minValues, shape=newShape)
+        let indexesResult = t.MakeLike(indexes, shape=newShape, dtype=Dtype.Int64).Cast(Dtype.Int32)
+        minValuesResult, indexesResult
+
+    override t.MinIndexT() = 
+        // LibTorch 1.7.0: Min on float16/bfloat16 causes grief
+        let tt = 
+            if dtype = Dtype.Float16 || dtype = Dtype.BFloat16 then 
+                tt.to_type(torch.ScalarType.Float32)
+            else
+                tt
+        let res = Array.zeroCreate<int64> t.Dim
+        let idxs = Array.zeroCreate t.Dim
+        let mutable values = tt
+        // repeatedly reduce, tracking the recorded index for the final minimum eventually selected
+        for i = t.Dim - 1 downto 0 do 
+            let (struct (values2, indexes)) = values.min(int64 i)
+            values <- values2
+            idxs[i] <- indexes
+
+        for i = 0 to t.Dim - 1 do 
+            let idx = idxs[i]
+
+            res[i] <- 
+                match i with 
+                | 0 -> idx.ToInt64()
+                | 1 -> idx[res[0]].ToInt64() 
+                | 2 -> idx[res[0], res[1]].ToInt64() 
+                | 3 -> idx[res[0], res[1], res[2]].ToInt64() 
+                | 4 -> idx[res[0], res[1], res[2], res[3]].ToInt64() 
+                | 5 -> idx[res[0], res[1], res[2], res[3], res[4]].ToInt64() 
+                | 6 -> idx[res[0], res[1], res[2], res[3], res[4], res[5]].ToInt64() 
+                | _ -> failwith "MinIndexT > 6d nyi for torch"
+        res |> Array.map int32
+    
+    override t1.AddTT(t2, alpha) =
+        let result = 
+            match alpha with 
+            | Some v -> tt.add(t2.TorchTensor, toTorchScalar v)
+            | None -> tt.add(t2.TorchTensor)
+        t1.MakeLike(result)
+
+    override t1.AddTT0(t2: scalar, ?alpha: scalar) =
+        let result = 
+            match alpha with 
+            | Some v -> tt.add(toTorchScalar t2, toTorchScalar v)
+            | None -> tt.add(toTorchScalar t2)
+        t1.MakeLike(result)
+
+    override t1.AddTTSlice(location:int[], t2) =
+        Shape.checkCanAddSlice t1.Shape location t2.Shape
+        let shape1 = t1.Shape
+        let shape2 = t2.Shape
+        let expandedShape2 = Shape.unsqueezeAs shape2 shape1
+        let t2Expanded = t2.TorchTensor.expand(toTorchShape expandedShape2)
+        let res = tt.clone()
+        let mutable t1Slice = res // will share memory with res
+        for d in 0 .. location.Length - 1 do 
+            let len2 = expandedShape2[d]
+            if location[d] <> 0 || len2 <> shape1[d] then 
+                t1Slice <- t1Slice.narrow(int64 d, int64 location[d], int64 len2)
+        t1Slice.add_(t2Expanded) |> ignore
+        t1.MakeLike(res)
+
+    override t1.SubTT(t2) = 
+        match dtype with 
+        | Dtype.Bool -> opNotSupported2 "SubT" dtype t2.Dtype
+        | _ ->
+        let result = tt.sub(t2.TorchTensor)
+        t1.MakeLike(result)
+
+    override t2.SubFromT0T(t1:scalar) = t2.SubTT0(t1).NegT()
+
+    override t1.SubTT0(t2: scalar) = 
+        //let t2v = t2.TorchTensor.ToScalar()
+        let result = tt.sub(toTorchScalar t2)
+        t1.MakeLike(result)
+
+    override t1.MulTT(t2) = 
+        let result = tt.mul(t2.TorchTensor)
+        t1.MakeLike(result)
+
+    override t1.MulTT0(t2) = 
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "MulTT0" dtype
+        | _ ->
+        let result = tt.mul(toTorchScalar t2)
+        t1.MakeLike(result)
+
+    override t1.DivTT(t2) = 
+        match dtype with 
+        | Dtype.Bool -> opNotSupported2 "DivTT" dtype t2.Dtype
+        | _ ->
+        let result = tt.div(t2.TorchTensor)
+        // Torch uses "true division" mirroring Python 3
+        // https://www.python.org/dev/peps/pep-0238/
+        // https://pytorch.org/docs/stable/generated/torch.div.html
+        // also see https://github.com/DiffSharp/DiffSharp/issues/239
+        let outtype = Dtype.divisionType t1.Dtype t2.Dtype
+        t1.MakeLike(result.to_type(toTorchType outtype), dtype=outtype)
+
+    override t2.DivFromT0T(t1: scalar) =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "DivT0T" dtype
+        | _ ->
+        let t1 = t2.FullLike(Shape.scalar, t1, dtype=t1.dtype)
+        let result = t1.TorchTensor.div(t2.TorchTensor)
+        // Torch uses "true division" mirroring Python 3
+        // https://www.python.org/dev/peps/pep-0238/
+        // https://pytorch.org/docs/stable/generated/torch.div.html
+        // also see https://github.com/DiffSharp/DiffSharp/issues/239
+        let outtype = widenScalarForDivision t2.Dtype t1.Dtype
+        t2.MakeLike(result.to_type(toTorchType outtype), dtype=outtype)
+
+    override t1.DivTT0(t2) = 
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "DivTT0" dtype
+        | _ ->
+        let t2 = toTorchScalar t2
+        // let t2 = t1.FullLike(Shape.scalar, t2, dtype=t1.Dtype)
+        let result = tt.div(t2)
+        // Torch uses "true division" mirroring Python 3
+        // https://www.python.org/dev/peps/pep-0238/
+        // https://pytorch.org/docs/stable/generated/torch.div.html
+        // also see https://github.com/DiffSharp/DiffSharp/issues/239
+        let outtype = widenScalarForDivision t1.Dtype (fromTorchType t2.Type)
+        t1.MakeLike(result.to_type(toTorchType outtype), dtype=outtype)
+
+    override t1.PowTT(t2) =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "PowTT" dtype
+        | _ -> 
+        let result = tt.pow(t2.TorchTensor)
+        t1.MakeLike(result)
+
+    override t2.PowFromT0T(t1:scalar) = 
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "PowT0T" dtype
+        | _ -> 
+        let t1 = t2.FullLike(Shape.scalar, t1)
+        let result = t1.Expand(t2.Shape).TorchTensor.pow(t2.TorchTensor)
+        t2.MakeLike(result)
+
+    override t1.PowTT0(t2:scalar) =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "PowTT0" dtype
+        | _ -> 
+        let t2v = toTorchScalar t2
+        let result = tt.pow(t2v)
+        t1.MakeLike(result)
+
+    override t1.MatMulTT(t2) = 
+        match dtype with 
+        | Dtype.Bool -> opNotSupported2 "MatMulTT" dtype t2.Dtype
+        | _ ->  
+        let (t1BatchPart, t1MatrixPart), (t2BatchPart, t2MatrixPart) = Shape.checkCanMatmul t1.Shape t2.Shape
+        if t1BatchPart <> t2BatchPart then failwithf "Cannot matrix multiply raw tensors with shapes %A, %A - mismatch batching" t1.Shape t2.Shape
+        let t1rows = t1MatrixPart[0]
+        let t2cols = t2MatrixPart[1]
+        let newShape = Array.append t1BatchPart [| t1rows; t2cols |]        
+        let result =
+            // "addmm for CUDA tensors only supports floating-point types. Try converting the tensors with .float()" | const char *
+            match t1.DeviceType, dtype with 
+            | TensorMath.DeviceType.CUDA, (Dtype.Integral as dtype) ->
+                let tt1 = tt.to_type(torch.ScalarType.Float64)
+                let tt2 = t2.TorchTensor.to_type(torch.ScalarType.Float64)
+                tt1.matmul(tt2).round().to_type(toTorchType dtype) 
+            | _ ->
+                tt.matmul(t2.TorchTensor)
+        t1.MakeLike(result, newShape)
+
+    override t1.BMMTT(t2) =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported2 "BMMTT" dtype t2.Dtype
+        | _ ->  
+        let resultShape = Shape.checkCanBMM t1.Shape t2.Shape
+        let result =
+            // "addmm for CUDA tensors only supports floating-point types. Try converting the tensors with .float()" | const char *
+            match t1.DeviceType, dtype with 
+            | TensorMath.DeviceType.CUDA, (Dtype.Integral as dtype) ->
+                let tt1 = tt.to_type(torch.ScalarType.Float64)
+                let tt2 = t2.TorchTensor.to_type(torch.ScalarType.Float64)
+                tt1.bmm(tt2).round().to_type(toTorchType dtype) 
+            | _ ->
+                tt.bmm(t2.TorchTensor)
+        t1.MakeLike(result, resultShape)        
+
+    override t1.Conv1D(t2, stride, padding) = // TODO: bias, dilation and groups
+        let _batchSize, _inputChannels, _kernelSize, _outputChannels, _outputSize, outputShape =
+            Shape.checkCanConv1d t1.DeviceType t2.DeviceType dtype t2.Dtype t1.Shape t2.Shape stride padding 1
+        let resultt =
+            // "conv1d for CUDA tensors only supports floating-point types."
+            match t1.DeviceType, dtype with 
+            | TensorMath.DeviceType.CUDA, (Dtype.Integral as dtype) ->
+                torch.nn.functional.conv1d(tt.to_type(torch.ScalarType.Float64), t2.TorchTensor.to_type(torch.ScalarType.Float64), stride=int64 stride, padding=int64 padding, dilation=1L).round().to_type(toTorchType dtype) 
+            | _ ->
+                torch.nn.functional.conv1d(tt, t2.TorchTensor, stride=int64 stride, padding=int64 padding, dilation=1L)
+        t1.MakeLike(resultt, shape=outputShape)
+
+    override t1.Conv2D(t2, strides, paddings) = // TODO: bias, dilation and groups
+        let _batchSize, _inputChannels, _kernelDimensions, _outputDimensions, outputShape =
+            Shape.checkCanConv2d t1.DeviceType t2.DeviceType dtype t2.Dtype t1.Shape t2.Shape strides paddings [| 1;1 |]
+        let resultt =
+            // "conv2d for CUDA tensors only supports floating-point types."
+            match t1.DeviceType, dtype with 
+            | TensorMath.DeviceType.CUDA, (Dtype.Integral as dtype) ->
+                torch.nn.functional.conv2d(tt.to_type(torch.ScalarType.Float64), t2.TorchTensor.to_type(torch.ScalarType.Float64), strides=int64s strides, padding=int64s paddings).round().to_type(toTorchType dtype) 
+            | _ ->
+                torch.nn.functional.conv2d(tt, t2.TorchTensor, strides=int64s strides, padding=int64s paddings)
+        t1.MakeLike(resultt, shape=outputShape)
+
+    override t1.Conv3D(t2, strides, paddings) = // TODO: bias, dilation and groups
+        let _batchSize, _inputChannels, _kernelDimensions, _outputDimensions, outputShape =
+            Shape.checkCanConv3d t1.DeviceType t2.DeviceType dtype t2.Dtype  t1.Shape t2.Shape strides paddings [| 1;1;1 |]
+        let resultt =
+            // "conv2d for CUDA tensors only supports floating-point types."
+            match t1.DeviceType, dtype with 
+            | TensorMath.DeviceType.CUDA, (Dtype.Integral as dtype) ->
+                torch.nn.functional.conv3d(tt.to_type(torch.ScalarType.Float64), t2.TorchTensor.to_type(torch.ScalarType.Float64), strides=int64s strides, padding=int64s paddings).round().to_type(toTorchType dtype) 
+            | _ ->
+                torch.nn.functional.conv3d(tt, t2.TorchTensor, strides=int64s strides, padding=int64s paddings)
+        t1.MakeLike(resultt, shape=outputShape)
+
+    override t1.MaxPool1D(kernelSize, stride, padding) = 
+        let _batchSize, _channels, _inputSize, _outputSize, outputShape =
+            Shape.checkCanMaxpool1d dtype t1.Shape kernelSize stride padding
+        match dtype with 
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "MaxPool1D" dtype
+        | _ ->
+        let struct (resultt, indicest) = torch.nn.functional.max_pool1d_with_indices(tt, int64 kernelSize, stride=int64 stride, padding=int64 padding, dilation=1L)
+        // NOTE: TensorMath currently expects indices as an Int32 tensor
+        let indices = t1.MakeLike(indicest, shape=outputShape, dtype=Dtype.Int64).Cast(Dtype.Int32)
+        let result = t1.MakeLike(resultt, shape=outputShape)
+        result, indices
+
+    override t1.MaxPool2D(kernelSize, strides, paddings) = 
+        let _batchSize, _channels, _inputDimensions, _kernelDimensions, _outputDimensions, outputShape =
+            Shape.checkCanMaxpool2d dtype t1.Shape kernelSize strides paddings
+        let struct (resultt, indicest) = torch.nn.functional.max_pool2d_with_indices(tt, int64s kernelSize, strides=int64s strides, padding=int64s paddings)
+        // NOTE: TensorMath currently expects indices as an Int32 tensor, Torch wants Int64
+        let indices = t1.MakeLike(indicest, shape=outputShape, dtype=Dtype.Int64).Cast(Dtype.Int32)
+        let result = t1.MakeLike(resultt, shape=outputShape)
+        result, indices
+
+    override t1.MaxPool3D(kernelSize, strides, paddings) = 
+        let _batchSize, _channels, _inputDimensions, _kernelDimensions, _outputDimensions, outputShape =
+            Shape.checkCanMaxpool3d dtype t1.Shape kernelSize strides paddings
+        let struct (resultt, indicest) = torch.nn.functional.max_pool3d_with_indices(tt, int64s kernelSize, strides=int64s strides, padding=int64s paddings)
+        
+        // NOTE: TensorMath currently expects indices as an Int32 tensor
+        let indices = t1.MakeLike(indicest, shape=outputShape, dtype=Dtype.Int64).Cast(Dtype.Int32)
+        let result = t1.MakeLike(resultt, shape=outputShape)
+        result, indices
+
+    override t1.MaxUnpool1D(indices, outputSize) = 
+        // NOTE: LibTorch has no torch::max_unpool1d and so TorchSharp has Tensor.MaxUnpool1D
+        // So use MaxUnpool2D instead
+        //let batchSize, channels, _inputSize, _outputShape = Shape.computeMaxUnpool1d t1.Shape outputSize
+        let t1X = t1.UnsqueezeT(2)
+        let indicesX = indices.UnsqueezeT(2)
+        let resulttX = t1X.MaxUnpool2D(indicesX, [| outputSize[0]; outputSize[1]; 1; outputSize[2] |])
+        let resultt = resulttX.SqueezeT(2)
+        resultt
+
+    override t1.MaxUnpool2D(indices, outputSize) = 
+        let _batchSize, _channels, _inputDimensions, outputShape =
+            Shape.checkCanMaxunpool2d dtype t1.Shape indices.Dtype indices.Shape outputSize
+        // NOTE: TensorMath currently expects indices as an Int32 tensor
+        let indices = indices.Cast(Dtype.Int64)
+
+        // note, LibTorch only wants the last two elements of the output size passsed in
+        // "There should be exactly two elements (height, width) in output_size (max_unpooling2d_shape_check at ...)"
+        let outputSize = outputSize[2..3]
+        
+        // TODO: consider switching to the torch::nn module for MaxUnpool2d
+
+        let resultt = torch.nn.functional.max_unpool2d(tt, indices.TorchTensor, int64s outputSize)
+        t1.MakeLike(resultt, shape=outputShape)
+
+    override t1.MaxUnpool3D(indices, outputSize) = 
+        let _batchSize, _channels, _inputDimensions, outputShape =
+            Shape.checkCanMaxunpool3d dtype t1.Shape indices.Dtype indices.Shape outputSize
+        // NOTE: TensorMath currently expects indices as an Int32 tensor
+        let indices = indices.Cast(Dtype.Int64)
+
+        // note, LibTorch only wants the last three elements of the output size passsed in
+        // "There should be exactly three elements (depth, height, width) in output_size (max_unpooling3d_shape_check at ..\..\aten\src\ATen\native\MaxUnpooling.cpp:231)"
+        let outputSize = outputSize[2..4]
+        
+        // NOTE: strides and padding must always be specified for torch::max_unpool3d C++ entry
+        // TODO: consider switching to the torch::nn module for MaxUnpool
+        let strides = outputSize |> Array.map (fun _ -> 1L)
+        let padding = outputSize |> Array.map (fun _ -> 0L)
+        let resultt = torch.nn.functional.max_unpool3d(tt, indices.TorchTensor, int64s outputSize, strides, padding)
+        t1.MakeLike(resultt, shape=outputShape)
+
+    override t1.AvgPool1D(kernelSize, stride, padding) =
+        let _batchSize, _channels, _inputSize, _outputSize, outputShape = Shape.checkCanAvgpool1d dtype t1.Shape kernelSize stride padding
+        match dtype with 
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "AvgPool1D" dtype
+        | _ ->
+        let resultt = torch.nn.functional.avg_pool1d(tt, int64 kernelSize, stride=int64 stride, padding=int64 padding)
+        let result = t1.MakeLike(resultt, shape=outputShape)
+        result
+
+    override t1.AvgPool2D(kernelSize, stride, padding) = 
+        let _batchSize, _channels, _inputSize, _kernelSize, _outputSize, outputShape = Shape.checkCanAvgpool2d dtype t1.Shape kernelSize stride padding
+        match dtype with 
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "AvgPool2D" dtype
+        | _ ->
+        let resultt = torch.nn.functional.avg_pool2d(tt, int64s kernelSize, strides=int64s stride, paddings=int64s padding)
+        let result = t1.MakeLike(resultt, shape=outputShape)
+        result
+
+    override t1.AvgPool3D(kernelSize, stride, padding) =
+        let _batchSize, _channels, _inputSize, _kernelSize, _outputSize, outputShape = Shape.checkCanAvgpool3d dtype t1.Shape kernelSize stride padding
+        match dtype with 
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "AvgPool3D" dtype
+        | _ ->
+        let resultt = torch.nn.functional.avg_pool3d(tt, int64s kernelSize, strides=int64s stride, paddings=int64s padding)
+        let result = t1.MakeLike(resultt, shape=outputShape)
+        result
+
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) =
+        let t1X = t1.UnsqueezeT(2)
+        let originalInputX = originalInput.UnsqueezeT(2)
+        let resulttX = t1X.AvgPoolReverse2D(originalInputX, [| 1; kernelSize |], [| 1; stride |], [| 0; padding |])
+        let resultt = resulttX.SqueezeT(2)
+        resultt
+
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = 
+        match dtype with 
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "AvgPoolReverse2D" dtype
+        | _ ->
+        let resultt = torch.nn.functional.avg_pool2d_backward(tt, originalInput.TorchTensor, int64s kernelSize, strides=int64s stride, paddings=int64s padding)
+        let result = t1.MakeLike(resultt, shape=originalInput.Shape)
+        result
+
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) =
+        match dtype with 
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "AvgPoolReverse3D" dtype
+        | _ ->
+        let resultt = torch.nn.functional.avg_pool3d_backward(tt, originalInput.TorchTensor, int64s kernelSize, strides=int64s stride, paddings=int64s padding)
+        let result = t1.MakeLike(resultt, shape=originalInput.Shape)
+        result
+
+    override t.NegT() =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "NegT" dtype
+        | _ ->  t.MakeLike(-tt)
+
+    override t.SumT(?resultType) =
+        let typeArg = match resultType with None -> Nullable() | Some dt -> Nullable(toTorchType dt)
+        let outType = match resultType with None -> dtype.SummationType | Some dt -> dt
+        t.MakeLike(tt.sum(typeArg), shape=Shape.scalar, dtype=outType)
+
+    override t.SumTDim(dim, ?resultType) =
+        let typeArg = match resultType with None -> Nullable() | Some dt -> Nullable(toTorchType dt)
+        let outType = match resultType with None -> dtype.SummationType | Some dt -> dt
+        let ret = tt.sum(dim=(int64 dim), ``type``=typeArg, keepdim=false)  // keepdim is fixed to false as it is handled at Tensor level, not at RawTensor level
+        t.MakeLike(ret, shape=fromTorchShape ret.shape, dtype=outType)
+
+    override t.SignT() =
+        t.MakeLike(tt.sign())
+
+    override t.FloorT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "FloorT" dtype
+        | _ ->  t.MakeLike(tt.floor())
+
+    override t.CeilT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "CeilT" dtype
+        | _ ->  t.MakeLike(tt.ceil())
+
+    override t.RoundT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "RoundT" dtype
+        | _ ->  t.MakeLike(tt.round())
+
+    override t.AbsT() = 
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "AbsT" dtype
+        | Dtype.Int8 -> t.Cast(Dtype.Int32).AbsT().Cast(Dtype.Int8) // TODO: there is odd behaviour from torch for relu on int8, may have been fixed in later version?
+        | _ -> t.MakeLike(tt.abs ())
+
+    override t.SoftplusT() = 
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "SoftplusT" dtype
+        | _ -> t.MakeLike(tt.softplus())
+
+    override t.ReluT() =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "ReluT" dtype
+        | Dtype.Int8 -> t.Cast(Dtype.Int32).ReluT().Cast(Dtype.Int8) // TODO: there is odd behaviour from torch for relu on int8, may have been fixed in later version?
+        | _ ->   t.MakeLike(tt.relu())
+
+    override t.SigmoidT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "SigmoidT" dtype
+        | _ ->  t.MakeLike(tt.sigmoid())
+
+    override t.ExpT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "ExpT" dtype
+        | _ ->  t.MakeLike(tt.exp())
+
+    override t.LogT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "LogT" dtype
+        | _ ->  t.MakeLike(tt.log())
+
+    override t.Log10T() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "Log10T" dtype
+        | _ ->   t.MakeLike(tt.log10())
+
+    override t.SqrtT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "SqrtT" dtype
+        | _ ->  t.MakeLike(tt.sqrt())
+
+    override t.SinT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "SinT" dtype
+        | _ ->  t.MakeLike(tt.sin())
+
+    override t.CosT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "CosT" dtype
+        | _ ->  t.MakeLike(tt.cos())
+
+    override t.TanT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "TanT" dtype
+        | _ ->  t.MakeLike(tt.tan())
+
+    override t.SinhT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "SinhT" dtype
+        | _ ->  t.MakeLike(tt.sinh())
+
+    override t.CoshT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "CoshT" dtype
+        | _ ->  t.MakeLike(tt.cosh())
+
+    override t.TanhT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "TanhT" dtype
+        | _ ->  t.MakeLike(tt.tanh())
+
+    override t.AsinT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "AsinT" dtype
+        | _ ->  t.MakeLike(tt.asin())
+
+    override t.AcosT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "AcosT" dtype
+        | _ ->  t.MakeLike(tt.acos())
+
+    override t.AtanT() =
+        match dtype with 
+        | Dtype.IntegralOrBool -> opNotSupported "AtanT" dtype
+        | _ ->  t.MakeLike(tt.atan())
+#if LATEST_TORCHSHARP
+    // Included to track new functionality available in TorchSharp
+    //
+    // These will be progressed to RawTensor and Tensor
+    member t.AdaptiveAvgPool1D(outputSize: int32) =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "AdaptiveAvgPool1D" dtype
+        | _ ->  t.MakeLike(tt.AdaptiveAvgPool1D(int64 outputSize))
+
+    member t.AdaptiveAvgPool2D(outputSizes: int32[]) =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "AdaptiveAvgPool2D" dtype
+        | _ ->  t.MakeLike(tt.AdaptiveAvgPool2D(int64s outputSizes))
+
+    member t.AdaptiveAvgPool3D(outputSizes: int32[]) =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "AdaptiveAvgPool3D" dtype
+        | _ ->  t.MakeLike(tt.AdaptiveAvgPool3D(int64s outputSizes))
+
+    member t.AdaptiveAvgPool3DBackward(originalInput: RawTensor) =
+        match dtype with 
+        | Dtype.Bool -> opNotSupported "AdaptiveAvgPool3DBackward" dtype
+        | _ ->  t.MakeLike(tt.AdaptiveAvgPool3Backward(originalInput.TorchTensor))
+
+    //member t.AvgPool1D(kernelSize: int32, stride: int32, padding: int32, ?ceil_mode: bool, ?count_include_pad: bool) =
+    //    //let _batchSize, _channels, _inputSize, _outputSize, outputShape = Shape.checkCanAvgPool1d dtype t1.Shape kernelSize stride padding
+    //    match dtype with 
+    //    | Dtype.Bool -> opNotSupported "AvgPool1D" dtype
+    //    | _ ->
+    //    let _resultt = tt.AvgPool1D(int64 kernelSize, stride=int64 stride, padding=int64 padding, ?ceil_mode=ceil_mode, ?count_include_pad=count_include_pad)
+    //    failwith "tbd - outputShape"
+    //    //t.MakeLike(resultt, shape=outputShape)
+
+    //member t.AvgPool2D(kernelSizes: int32[], strides: int32[], paddings: int32[], ?ceil_mode: bool, ?count_include_pad: bool) =
+    //    failwith "tbd - TorchSharp signture being updated"
+        ////let _batchSize, _channels, _inputSize, _outputSize, outputShape = Shape.checkCanAvgPool1d dtype t1.Shape kernelSize stride padding
+        //match dtype with 
+        //| Dtype.Bool -> opNotSupported "AvgPool2D" dtype
+        //| _ ->
+        //let _resultt = tt.AvgPool2D(int64s kernelSizes, stride=int64 stride, padding=int64 padding, ?ceil_mode=ceil_mode, ?count_include_pad=count_include_pad)
+        //failwith "tbd - outputShape"
+        ////t.MakeLike(resultt, shape=outputShape)
+
+    //member t.X(kernelSize: int32, stride: int32, padding: int32, ?ceil_mode: bool, ?count_include_pad: bool) =
+    //    //let _batchSize, _channels, _inputSize, _outputSize, outputShape = Shape.checkCanAvgPool1d dtype t1.Shape kernelSize stride padding
+    //    match dtype with 
+    //    | Dtype.Bool -> opNotSupported "AvgPool1D" dtype
+    //    | _ ->
+    //    let _resultt = tt.BitwiseAnd(int64 kernelSize, stride=int64 stride, padding=int64 padding, ?ceil_mode=ceil_mode, ?count_include_pad=count_include_pad)
+    //    failwith "tbd - outputShape"
+    //    //t.MakeLike(resultt, shape=outputShape)
+#endif
+
+    new (info: System.Runtime.Serialization.SerializationInfo, _context: System.Runtime.Serialization.StreamingContext) =
+        let dtype = info.GetValue("dtype", typeof<Dtype>) :?> Dtype
+        let shape = info.GetValue("shape", typeof<Shape>) :?> Shape
+        let tt =
+            match dtype with 
+            | Dtype.Bool -> 
+                let data = info.GetValue("data", typeof<bool[]>)  :?> bool[]
+                torch.tensor(data, dtype=toTorchType Dtype.Bool, dimensions=toTorchShape shape) 
+            | Dtype.Byte -> 
+                let data = info.GetValue("data", typeof<byte[]>)  :?> byte[]
+                torch.tensor(data, dtype=toTorchType Dtype.Byte, dimensions=toTorchShape shape) 
+            | Dtype.Int8 -> 
+                let data = info.GetValue("data", typeof<sbyte[]>)  :?> sbyte[]
+                torch.tensor(data, dtype=toTorchType Dtype.Int8, dimensions=toTorchShape shape) 
+            | Dtype.Int16 -> 
+                let data = info.GetValue("data", typeof<int16[]>)  :?> int16[]
+                torch.tensor(data, dtype=toTorchType Dtype.Int16, dimensions=toTorchShape shape) 
+            | Dtype.Int32 -> 
+                let data = info.GetValue("data", typeof<int32[]>)  :?> int32[]
+                torch.tensor(data, dtype=toTorchType Dtype.Int32, dimensions=toTorchShape shape) 
+            | Dtype.Int64 -> 
+                let data = info.GetValue("data", typeof<int64[]>)  :?> int64[]
+                torch.tensor(data, dtype=toTorchType Dtype.Int64, dimensions=toTorchShape shape) 
+            | Dtype.Float32 -> 
+                let data = info.GetValue("data", typeof<float32[]>)  :?> float32[]
+                torch.tensor(data, dtype=toTorchType Dtype.Float32, dimensions=toTorchShape shape) 
+            | Dtype.Float64 -> 
+                let data = info.GetValue("data", typeof<double[]>)  :?> double[]
+                torch.tensor(data, dtype=toTorchType Dtype.Float64, dimensions=toTorchShape shape) 
+            | Dtype.Float16 -> 
+                let data = info.GetValue("data", typeof<float32[]>)  :?> float32[]
+                torch.tensor(data, dtype=toTorchType Dtype.Float16, dimensions=toTorchShape shape) 
+            | Dtype.BFloat16 -> 
+                let data = info.GetValue("data", typeof<float32[]>)  :?> float32[]
+                torch.tensor(data, dtype=toTorchType Dtype.BFloat16, dimensions=toTorchShape shape) 
+
+        TorchRawTensor(tt, shape, dtype, Device.CPU)
+
+    interface System.Runtime.Serialization.ISerializable with
+
+        //[SecurityPermissionAttribute(SecurityAction.Demand,  SerializationFormatter = true)]
+        member t.GetObjectData(info, _context) =
+            
+            // Torch Tensors must be CPU before they can access RawData
+            let tCpu = t.MoveTo(Device.CPU) :?> TorchRawTensor
+
+            info.AddValue("dtype", t.Dtype)
+            info.AddValue("shape", t.Shape)
+            info.AddValue("data", tCpu.ToRawData())
+
+
+    override _.ClampInPlace(low, high) = 
+        // TODO - next version of TorchSharp will have in place version of this
+        checkMutable()
+        tt <- tt.clamp(low.TorchTensor.ToScalar(), high.TorchTensor.ToScalar())
+
+    override _.LtInPlace(t2) = checkMutable(); tt.lt_(t2.TorchTensor) |> ignore
+
+    override _.GtInPlace(t2) = checkMutable(); tt.gt_(t2.TorchTensor) |> ignore
+
+    override _.LeInPlace(t2) = checkMutable(); tt.le_(t2.TorchTensor) |> ignore
+
+    override _.GeInPlace(t2) = checkMutable(); tt.ge_(t2.TorchTensor) |> ignore
+
+    override _.EqInPlace(t2) = checkMutable(); tt.eq_(t2.TorchTensor) |> ignore
+
+    override _.NeqInPlace(t2) = checkMutable(); tt.ne_(t2.TorchTensor) |> ignore
+
+    override _.AddInPlace(t2, alpha) =
+        checkMutable()
+        match alpha with 
+        | Some v -> tt.add_(t2.TorchTensor, toTorchScalar v) |> ignore
+        | None -> tt.add_(t2.TorchTensor) |> ignore
+
+    override _.AddScalarInPlace(t2) =
+        checkMutable()
+        tt.add_(toTorchScalar t2) |> ignore
+
+    // TODO - this should be faster
+    override t1.AddSliceInPlace(location, t2) = 
+        checkMutable()
+        Shape.checkCanAddSlice t1.Shape location t2.Shape
+        let shape1 = t1.Shape
+        let shape2 = t2.Shape
+        let expandedShape2 = Shape.unsqueezeAs shape2 shape1
+        let t2Expanded = t2.TorchTensor.expand(toTorchShape expandedShape2)
+        let mutable t1Slice = tt // will share memory with res
+        for d in 0 .. location.Length - 1 do 
+            let len2 = expandedShape2[d]
+            if location[d] <> 0 || len2 <> shape1[d] then 
+                t1Slice <- t1Slice.narrow(int64 d, int64 location[d], int64 len2)
+        t1Slice.add_(t2Expanded) |> ignore
+
+    override _.SubInPlace(t2) = checkMutable(); tt.sub_(t2.TorchTensor) |> ignore
+
+    override _.SubScalarInPlace(t2) = checkMutable(); tt.sub_(toTorchScalar t2) |> ignore
+
+    override _.MulInPlace(t2) = checkMutable(); tt.mul_(t2.TorchTensor) |> ignore
+
+    override _.MulScalarInPlace(t2) = checkMutable(); tt.mul_(toTorchScalar t2) |> ignore
+
+    override _.DivInPlace(t2) = checkMutable(); tt.div_(t2.TorchTensor) |> ignore
+
+    override _.DivScalarInPlace(t2) = checkMutable(); tt.div_(toTorchScalar t2) |> ignore
+
+    override _.PowInPlace(t2) = checkMutable(); tt.pow_(t2.TorchTensor) |> ignore
+
+    override _.PowScalarInPlace(t2) = checkMutable(); tt.pow_(toTorchScalar t2) |> ignore
+
+    override _.MatMulInPlace(t2) = checkMutable(); tt <- tt.matmul(t2.TorchTensor) 
+
+    override _.NegInPlace() = checkMutable(); tt.neg_() |> ignore
+
+    override _.SignInPlace() = checkMutable(); tt.sign_() |> ignore
+
+    override _.FloorInPlace() = checkMutable(); tt.floor_() |> ignore
+
+    override _.CeilInPlace() = checkMutable(); tt.ceil_() |> ignore
+
+    override _.RoundInPlace() = checkMutable(); tt.round_() |> ignore
+
+    override _.AbsInPlace() = checkMutable(); tt.abs_() |> ignore
+
+    override _.ReluInPlace() = checkMutable(); tt.relu_() |> ignore
+
+    override _.SoftplusInPlace() = checkMutable(); tt <- tt.softplus() 
+
+    override _.SigmoidInPlace() = checkMutable(); tt <- tt.sigmoid() 
+
+    override _.ExpInPlace() = checkMutable(); tt <- tt.exp()
+
+    override _.LogInPlace() = checkMutable(); tt.log_() |> ignore
+
+    override _.Log10InPlace() = checkMutable(); tt.log10_() |> ignore
+
+    override _.SqrtInPlace() = checkMutable(); tt.sqrt_() |> ignore
+
+    override _.SinInPlace() = checkMutable(); tt.sin_() |> ignore
+
+    override _.CosInPlace() = checkMutable(); tt.cos_() |> ignore
+
+    override _.TanInPlace() = checkMutable(); tt.tan_() |> ignore
+
+    override _.SinhInPlace() = checkMutable(); tt.sinh_() |> ignore
+
+    override _.CoshInPlace() = checkMutable(); tt.cosh_() |> ignore
+
+    override _.TanhInPlace() = checkMutable(); tt.tanh_() |> ignore
+
+    override _.AsinInPlace() = checkMutable(); tt.asin_() |> ignore
+
+    override _.AcosInPlace() = checkMutable(); tt.acos_() |> ignore
+
+    override _.AtanInPlace() = checkMutable(); tt.atan_() |> ignore
+
+    // TODO - next version of TorchSharp will have in place version of this
+    override t.OnesInPlace() = checkMutable(); tt <- (RawTensor.Ones(shape, dtype, t.Device, Backend.Torch) :?> TorchRawTensor).TorchTensor
+
+    // TODO - next version of TorchSharp will have in place version of this
+    override t.ZerosInPlace() = checkMutable(); tt <- (RawTensor.Zeros(shape, dtype, t.Device, Backend.Torch) :?> TorchRawTensor).TorchTensor
+
+    // TODO - next version of TorchSharp will have in place version of this
+    override t.RandomInPlace() = checkMutable(); tt <- (RawTensor.Random(shape, dtype, t.Device, Backend.Torch) :?> TorchRawTensor).TorchTensor
+
+    // TODO - next version of TorchSharp will have in place version of this
+    override t.RandomNormalInPlace() = checkMutable(); tt <- (RawTensor.RandomNormal(shape, dtype, t.Device, Backend.Torch) :?> TorchRawTensor).TorchTensor
+
+    // TODO - next version of TorchSharp will have in place version of this
+    override t.RandomIntInPlace(low, high) = checkMutable(); tt <- (RawTensor.RandomInt(shape, low, high, dtype, t.Device, Backend.Torch) :?> TorchRawTensor).TorchTensor
+
+    override t.SetMutable() = isMutable <- true
+
+    override t.IsMutable = isMutable
+
+/// The parameterized implementation of the static ops. Use a generic class to
+/// make sure we get the correlation with .NET types correct and systematic
+type TorchTensorOps<'T, 'T2>
+       (dtype: Dtype, conv: 'T -> 'T2,
+        fromScalar: 'T2 -> torch.Tensor,
+        from: 'T2[] * TorchShape -> torch.Tensor,
+        zero: 'T,
+        one: 'T,
+        empty: TorchShape  * Device -> torch.Tensor,
+        zeros: TorchShape  * Device -> torch.Tensor,
+        ones: TorchShape  * Device -> torch.Tensor,
+        random: TorchShape  * Device -> torch.Tensor,
+        randomN: TorchShape  * Device -> torch.Tensor,
+        randomIntegers: TorchShape * int * int * Device -> torch.Tensor,
+        valueFromScalar: scalar -> 'T,
+        scalarFromConvValue: 'T2 -> TorchSharp.Scalar) = 
+
+    member _.Zero(device) = TorchRawTensor(torchMoveTo (fromScalar (conv zero)) device, Shape.scalar, dtype, device) :> RawTensor 
+    member _.One(device) = TorchRawTensor(torchMoveTo (fromScalar (conv one)) device, Shape.scalar, dtype, device) :> RawTensor
+    member _.Empty(shape:Shape, device) = TorchRawTensor(empty(toTorchShape shape, device), shape, dtype, device) :> RawTensor
+    member _.Zeros(shape:Shape, device) = TorchRawTensor(zeros(toTorchShape shape, device), shape, dtype, device) :> RawTensor
+    member _.Ones(shape:Shape, device) = TorchRawTensor(ones(toTorchShape shape, device), shape, dtype, device) :> RawTensor
+    member _.Random(shape:Shape, device) = TorchRawTensor(random(toTorchShape shape, device), shape, dtype, device) :> RawTensor
+    member _.RandomNormal(shape:Shape, device) = TorchRawTensor(randomN(toTorchShape shape, device), shape, dtype, device) :> RawTensor
+    member _.RandomInt(shape, low, high, device) = TorchRawTensor(randomIntegers(toTorchShape shape, low, high, device), shape, dtype, device) :> RawTensor
+
+    member _.Full(shape:Shape, value:scalar, device) =
+        let t = empty(toTorchShape shape, device)
+        t.fill_(scalarFromConvValue (conv (valueFromScalar value))) |> ignore
+        TorchRawTensor(t, shape, dtype, device) :> RawTensor
+
+    member _.CreateFromFlatArray(values:Array, shape:Shape, device: Device) : RawTensor =
+        let values = values :?> 'T[] |> Array.map conv 
+        // torch.InitializeDevice(device.ToTorch) |> ignore
+        let t = 
+            match shape with 
+            | [| |] -> fromScalar(values[0])
+            | _ -> from (values, toTorchShape shape)
+        let tt = torchMoveTo t device
+        TorchRawTensor(tt, shape, dtype, device) :> RawTensor
+
+type TorchFloat32TensorOps() = 
+
+    inherit TorchTensorOps<single, single>(Dtype.Float32, id, 
+        (fun v -> torch.tensor(float v, dtype=toTorchType Dtype.Float32)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Float32)), 
+        0.0f, 1.0f, 
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Float32, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Float32, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Float32, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.rand(size=shape, dtype=toTorchType Dtype.Float32, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.randn(size=shape, dtype=toTorchType Dtype.Float32, device=device.ToTorch)), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Float32, device=device.ToTorch).add_((float low).ToScalar())), 
+        System.Convert.ToSingle, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchFloat64TensorOps() = 
+
+    inherit TorchTensorOps<double, double>(Dtype.Float64, id, 
+        (fun v -> torch.tensor(v, dtype=toTorchType Dtype.Float64)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Float64)), 
+        0.0, 1.0, 
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Float64, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Float64, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Float64, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.rand(size=shape, dtype=toTorchType Dtype.Float64, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.randn(size=shape, dtype=toTorchType Dtype.Float64, device=device.ToTorch)), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Float64, device=device.ToTorch).add_((double low).ToScalar())), 
+        System.Convert.ToDouble, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchInt8TensorOps() = 
+
+    inherit TorchTensorOps<sbyte, sbyte>(Dtype.Int8, sbyte,
+        (fun v -> torch.tensor(int64 v, dtype=toTorchType Dtype.Int8)),
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Int8)), 
+        0y, 1y,
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Int8, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Int8, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Int8, device=device.ToTorch)), 
+        (fun _ -> opNotSupported "Random" Dtype.Int8), 
+        (fun _ -> opNotSupported "RandomNormal" Dtype.Int8), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Int8, device=device.ToTorch).add_((sbyte low).ToScalar())), 
+        System.Convert.ToSByte, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchInt16TensorOps() = 
+
+    inherit TorchTensorOps<int16, int16>(Dtype.Int16, int16, 
+        (fun v -> torch.tensor(int64 v, dtype=toTorchType Dtype.Int16)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Int16)), 
+        0s, 1s,
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Int16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Int16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Int16, device=device.ToTorch)), 
+        (fun _ -> opNotSupported "Random" Dtype.Int16), 
+        (fun _ -> opNotSupported "RandomNormal" Dtype.Int16), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Int16, device=device.ToTorch).add_((int16 low).ToScalar())), 
+        System.Convert.ToInt16, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchInt32TensorOps() = 
+
+    inherit TorchTensorOps<int32, int32>(Dtype.Int32, int32, 
+        (fun v -> torch.tensor(int64 v, dtype=toTorchType Dtype.Int32)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Int32)), 
+        0, 1,
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Int32, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Int32, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Int32, device=device.ToTorch)), 
+        (fun _ -> opNotSupported "Random" Dtype.Int32), 
+        (fun _ -> opNotSupported "RandomNormal" Dtype.Int32), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Int32, device=device.ToTorch).add_((int32 low).ToScalar())), 
+        System.Convert.ToInt32, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchInt64TensorOps() = 
+
+    inherit TorchTensorOps<int64, int64>(Dtype.Int64, int64, 
+        (fun v -> torch.tensor(v, dtype=toTorchType Dtype.Int64)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Int64)), 
+        0L, 1L,
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Int64, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Int64, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Int64, device=device.ToTorch)), 
+        (fun _ -> opNotSupported "Random" Dtype.Int64), 
+        (fun _ -> opNotSupported "RandomNormal" Dtype.Int64), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Int64, device=device.ToTorch).add_((int64 low).ToScalar())), 
+        System.Convert.ToInt64, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchBoolTensorOps() = 
+
+    inherit TorchTensorOps<bool, bool>(Dtype.Bool, id, 
+        (fun v -> torch.tensor(v, dtype=toTorchType Dtype.Bool)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Bool)), 
+        false, true,
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Bool, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Bool, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Bool, device=device.ToTorch)), 
+        (fun _ -> opNotSupported "Random" Dtype.Bool), 
+        (fun _ -> opNotSupported "RandomNormal"  Dtype.Bool), 
+        (fun (shape, low, high, device) -> torch.randint(min 2L (int64 (high-low)), size=shape, dtype=toTorchType Dtype.Bool, device=device.ToTorch).add_((low > 0).ToScalar())), 
+        System.Convert.ToBoolean, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchByteTensorOps() = 
+
+    inherit TorchTensorOps<byte, byte>(Dtype.Byte, id, 
+        (fun v -> torch.tensor(int64 v, dtype=toTorchType Dtype.Byte)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Byte)), 
+        0uy, 1uy,
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Byte, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Byte, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Byte, device=device.ToTorch)), 
+        (fun _ -> opNotSupported "Random" Dtype.Byte), 
+        (fun _ -> opNotSupported "RandomNormal"  Dtype.Byte), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Byte, device=device.ToTorch).add_((byte low).ToScalar())), 
+        System.Convert.ToByte, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchFloat16TensorOps() = 
+
+    inherit TorchTensorOps<single, single>(Dtype.Float16, id, 
+        (fun v -> torch.tensor(float v, dtype=toTorchType Dtype.Float16)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.Float16)), 
+        0.0f, 1.0f, 
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.Float16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.Float16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.Float16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.rand(size=shape, dtype=toTorchType Dtype.Float16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.randn(size=shape, dtype=toTorchType Dtype.Float16, device=device.ToTorch)), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.Float16, device=device.ToTorch).add_((float low).ToScalar())), 
+        System.Convert.ToSingle, 
+        TorchSharp.Scalar.op_Implicit)
+
+
+type TorchBFloat16TensorOps() = 
+
+    inherit TorchTensorOps<single, single>(Dtype.BFloat16, id, 
+        (fun v -> torch.tensor(float v, dtype=toTorchType Dtype.BFloat16)), 
+        (fun (data, shape) -> torch.tensor(data, shape, dtype=toTorchType Dtype.BFloat16)), 
+        0.0f, 1.0f, 
+        (fun (shape, device) -> torch.empty(size=shape, dtype=toTorchType Dtype.BFloat16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.zeros(size=shape, dtype=toTorchType Dtype.BFloat16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.ones(size=shape, dtype=toTorchType Dtype.BFloat16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.rand(size=shape, dtype=toTorchType Dtype.BFloat16, device=device.ToTorch)), 
+        (fun (shape, device) -> torch.randn(size=shape, dtype=toTorchType Dtype.BFloat16, device=device.ToTorch)), 
+        (fun (shape, low, high, device) -> torch.randint(int64 (high-low), size=shape, dtype=toTorchType Dtype.BFloat16, device=device.ToTorch).add_((float low).ToScalar())), 
+        System.Convert.ToSingle, 
+        TorchSharp.Scalar.op_Implicit)
+
+type TorchBackendTensorStatics() =
+    inherit BackendTensorStatics()
+
+    let torchFloat16 = TorchFloat16TensorOps()
+    let torchBFloat16 = TorchBFloat16TensorOps()
+    let torchFloat32 = TorchFloat32TensorOps()
+    let torchFloat64 = TorchFloat64TensorOps()
+    let torchInt8 = TorchInt8TensorOps()
+    let torchInt16 = TorchInt16TensorOps()
+    let torchInt32 = TorchInt32TensorOps()
+    let torchInt64 = TorchInt64TensorOps()
+    let torchByte = TorchByteTensorOps()
+    let torchBool = TorchBoolTensorOps()
+
+    let supported = Array.zeroCreate<int> 32
+    let isSupported (deviceType: TensorMath.DeviceType) = 
+        let n = int deviceType
+        match supported[n] with 
+        | 0 ->
+            try
+                torch.empty([| 1L |], device= torch.Device(deviceType.ToTorch, index=0)) |> ignore
+                supported[n] <- 1
+                true
+             with _ -> 
+                supported[n] <- 2
+                false
+        | 1 -> true
+        | _ -> false
+
+    override _.GetDevices(deviceType) = 
+        [ 
+          match deviceType with
+          | None | Some TensorMath.DeviceType.CPU ->
+              yield Device.CPU
+          | _ -> ()
+
+          match deviceType with
+          | None | Some TensorMath.DeviceType.CUDA ->
+              if torch_cuda.is_available() then 
+                  let ncuda = torch_cuda.device_count()
+                  for i in 0 .. ncuda - 1 do
+                      yield (TensorMath.Device(TensorMath.DeviceType.CUDA, i))
+          | _ -> ()
+          // We don't report other devices in GetDevices as yet though they may be usable
+          // There is currently no way in TorchSHarp to get the device count for other device types,
+          // you have to work it out via some other route.
+        ]
+
+    override _.IsDeviceTypeAvailable (deviceType) =
+        match deviceType with 
+        | TensorMath.DeviceType.CPU -> true
+        | TensorMath.DeviceType.CUDA -> torch_cuda.is_available()
+        | _ -> isSupported deviceType
+
+    override _.Seed(seed) =
+        if torch_cuda.is_available() then
+            torch_cuda.manual_seed(int64 seed)  |> ignore
+        torch.random.manual_seed(int64 seed)  |> ignore
+
+    override _.Zero(dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.Zero(device)
+        | BFloat16 -> torchBFloat16.Zero(device)
+        | Float32 -> torchFloat32.Zero(device)
+        | Float64 -> torchFloat64.Zero(device)
+        | Int8 -> torchInt8.Zero(device)
+        | Byte -> torchByte.Zero(device)
+        | Int16 -> torchInt16.Zero(device)
+        | Int32 -> torchInt32.Zero(device)
+        | Int64 -> torchInt64.Zero(device)
+        | Bool -> torchBool.Zero(device)
+
+    override _.One(dtype, device) = 
+        match dtype with 
+        | Float16 -> torchFloat16.One(device)
+        | BFloat16 -> torchBFloat16.One(device)
+        | Float32 -> torchFloat32.One(device)
+        | Float64 -> torchFloat64.One(device)
+        | Int8 -> torchInt8.One(device)
+        | Byte -> torchByte.One(device)
+        | Int16 -> torchInt16.One(device)
+        | Int32 -> torchInt32.One(device)
+        | Int64 -> torchInt64.One(device)
+        | Bool -> torchBool.One(device)
+
+    override _.Zeros(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.Zeros(shape, device)
+        | BFloat16 -> torchBFloat16.Zeros(shape, device)
+        | Float32 -> torchFloat32.Zeros(shape, device)
+        | Float64 -> torchFloat64.Zeros(shape, device)
+        | Int8 -> torchInt8.Zeros(shape, device)
+        | Byte -> torchByte.Zeros(shape, device)
+        | Int16 -> torchInt16.Zeros(shape, device)
+        | Int32 -> torchInt32.Zeros(shape, device)
+        | Int64 -> torchInt64.Zeros(shape, device)
+        | Bool -> torchBool.Zeros(shape, device)
+
+    override _.Empty(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.Empty(shape, device)
+        | BFloat16 -> torchBFloat16.Empty(shape, device)
+        | Float32 -> torchFloat32.Empty(shape, device)
+        | Float64 -> torchFloat64.Empty(shape, device)
+        | Int8 -> torchInt8.Empty(shape, device)
+        | Byte -> torchByte.Empty(shape, device)
+        | Int16 -> torchInt16.Empty(shape, device)
+        | Int32 -> torchInt32.Empty(shape, device)
+        | Int64 -> torchInt64.Empty(shape, device)
+        | Bool -> torchBool.Empty(shape, device)
+
+    override _.Ones(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.Ones(shape, device)
+        | BFloat16 -> torchBFloat16.Ones(shape, device)
+        | Float32 -> torchFloat32.Ones(shape, device)
+        | Float64 -> torchFloat64.Ones(shape, device)
+        | Int8 -> torchInt8.Ones(shape, device)
+        | Byte -> torchByte.Ones(shape, device)
+        | Int16 -> torchInt16.Ones(shape, device)
+        | Int32 -> torchInt32.Ones(shape, device)
+        | Int64 -> torchInt64.Ones(shape, device)
+        | Bool -> torchBool.Ones(shape, device)
+
+    override _.Full(shape:Shape, value:scalar, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.Full(shape, value, device)
+        | BFloat16 -> torchBFloat16.Full(shape, value, device)
+        | Float32 -> torchFloat32.Full(shape, value, device)
+        | Float64 -> torchFloat64.Full(shape, value, device)
+        | Int8 -> torchInt8.Full(shape, value, device)
+        | Byte -> torchByte.Full(shape, value, device)
+        | Int16 -> torchInt16.Full(shape, value, device)
+        | Int32 -> torchInt32.Full(shape, value, device)
+        | Int64 -> torchInt64.Full(shape, value, device)
+        | Bool -> torchBool.Full(shape, value, device)
+
+    override _.Random(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.Random(shape, device)
+        | BFloat16 -> torchBFloat16.Random(shape, device)
+        | Float32 -> torchFloat32.Random(shape, device)
+        | Float64 -> torchFloat64.Random(shape, device)
+        | Int8 -> torchInt8.Random(shape, device)
+        | Byte -> torchByte.Random(shape, device)
+        | Int16 -> torchInt16.Random(shape, device)
+        | Int32 -> torchInt32.Random(shape, device)
+        | Int64 -> torchInt64.Random(shape, device)
+        | Bool -> torchBool.Random(shape, device)
+
+    override _.RandomNormal(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.RandomNormal(shape, device)
+        | BFloat16 -> torchBFloat16.RandomNormal(shape, device)
+        | Float32 -> torchFloat32.RandomNormal(shape, device)
+        | Float64 -> torchFloat64.RandomNormal(shape, device)
+        | Int8 -> torchInt8.RandomNormal(shape, device)
+        | Byte -> torchByte.RandomNormal(shape, device)
+        | Int16 -> torchInt16.RandomNormal(shape, device)
+        | Int32 -> torchInt32.RandomNormal(shape, device)
+        | Int64 -> torchInt64.RandomNormal(shape, device)
+        | Bool -> torchBool.RandomNormal(shape, device)
+
+    override _.RandomInt(shape:Shape, low:int, high:int, dtype, device) = 
+        match dtype with 
+        | Float16 -> torchFloat16.RandomInt(shape, low, high, device)
+        | BFloat16 -> torchBFloat16.RandomInt(shape, low, high, device)
+        | Float32 -> torchFloat32.RandomInt(shape, low, high, device)
+        | Float64 -> torchFloat64.RandomInt(shape, low, high, device)
+        | Int8 -> torchInt8.RandomInt(shape, low, high, device)
+        | Byte -> torchByte.RandomInt(shape, low, high, device)
+        | Int16 -> torchInt16.RandomInt(shape, low, high, device)
+        | Int32 -> torchInt32.RandomInt(shape, low, high, device)
+        | Int64 -> torchInt64.RandomInt(shape, low, high, device)
+        | Bool -> torchBool.RandomInt(shape, low, high, device)
+
+    override _.CreateFromFlatArray(values:Array, shape, dtype, device) =
+        match dtype with 
+        | Float16 -> torchFloat16.CreateFromFlatArray(values, shape, device)
+        | BFloat16 -> torchBFloat16.CreateFromFlatArray(values, shape, device)
+        | Float32 -> torchFloat32.CreateFromFlatArray(values, shape, device)
+        | Float64 -> torchFloat64.CreateFromFlatArray(values, shape, device)
+        | Int8 -> torchInt8.CreateFromFlatArray(values, shape, device)
+        | Byte -> torchByte.CreateFromFlatArray(values, shape, device)
+        | Int16 -> torchInt16.CreateFromFlatArray(values, shape, device)
+        | Int32 -> torchInt32.CreateFromFlatArray(values, shape, device)
+        | Int64 -> torchInt64.CreateFromFlatArray(values, shape, device)
+        | Bool -> torchBool.CreateFromFlatArray(values, shape, device)
+
diff --git a/src/TensorMath/Backend.fs b/src/TensorMath/Backend.fs
new file mode 100644
index 0000000..a5dfd5c
--- /dev/null
+++ b/src/TensorMath/Backend.fs
@@ -0,0 +1,79 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+/// Represents a backend for TensorMath tensors
+[<RequireQualifiedAccess>]
+type Backend =
+    /// The reference backend 
+    | Reference
+    /// The LibTorch backend 
+    | Torch
+    /// Reserved for future use
+    | Other of name: string * code: int
+
+    member internal x.Code = 
+        match x with 
+        | Reference -> 0x000
+        | Torch -> 0x0100
+        | Other (_name, code) -> (code + 3) <<< 8
+
+    /// Get the name of the backend
+    member x.Name = 
+        match x with 
+        | Reference -> "Reference"
+        | Torch -> "Torch"
+        | Other (name, _) -> name
+
+    override x.ToString() = x.Name
+
+/// Contains functions and settings related to backend specifications.
+module Backend = 
+    let mutable internal count = 0
+    let internal codes = System.Collections.Concurrent.ConcurrentDictionary<string,Backend>()
+
+    /// Register a new backend
+    let Register name =
+        codes.GetOrAdd(name, (fun _ ->
+            count <- count + 1
+            Backend.Other(name, count)))
+
+    /// Get or set the default backend used when creating tensors. Note, use <c>dsharp.config(...)</c> instead.
+    let mutable Default = Backend.Reference
+
+type BackendFunctionality<'T>() =
+    let mutable last = None
+    let backends = System.Collections.Concurrent.ConcurrentDictionary<int, 'T>()
+
+    member _.Get(?backend: Backend) =
+        let backend = defaultArg backend Backend.Default
+        let code = backend.Code
+        match last with 
+        | Some (code2, v) when code = code2 -> v
+        | _ ->
+        match backends.TryGetValue(code) with 
+        | true, v -> v
+        | false, _ -> 
+            let res =
+                backends.GetOrAdd(code, fun _ -> 
+                    let name = "TensorMath.Backends." + backend.Name
+                    let fullName = System.Reflection.Assembly.GetExecutingAssembly().FullName.Replace("TensorMath", name)
+                    let asm = 
+                        try System.Reflection.Assembly.Load(fullName)
+                        with e ->  failwithf "Couldn't find assembly '%s', error = %s" fullName (e.ToString())
+                    let typeName = sprintf "TensorMath.Backends.%s.%s%s" backend.Name backend.Name typeof<'T>.Name
+                    let theType = asm.GetType(typeName)
+                    if isNull theType then failwithf "Couldn't find type '%s' in assembly '%s'" typeName fullName
+                    let b = 
+                        match System.Activator.CreateInstance(theType) with
+                        | :? 'T as b -> b
+                        | _ -> failwith "activation failed to return correct type"
+                    b
+                    ) 
+            last <- Some (code, res)
+            res
+
+    member _.Backends = backends
diff --git a/src/TensorMath/Device.fs b/src/TensorMath/Device.fs
new file mode 100644
index 0000000..103cd4a
--- /dev/null
+++ b/src/TensorMath/Device.fs
@@ -0,0 +1,62 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+/// <summary>
+///   Represents the type of a device. 
+/// </summary>
+///
+/// <remarks>
+///   The numeric values used are as for LibTorch.
+/// </remarks>
+///
+/// <namespacedoc>
+///   <summary>Contains fundamental types for the tensor programming model, including Tensor, Shape and dsharp.</summary>
+/// </namespacedoc>
+type DeviceType =
+    | CPU = 0
+    | CUDA = 1 // CUDA.
+    | MKLDNN = 2 // Reserved for explicit MKLDNN
+    | OPENGL = 3 // OpenGL
+    | OPENCL = 4 // OpenCL
+    | IDEEP = 5 // IDEEP.
+    | HIP = 6 // AMD HIP
+    | FPGA = 7 // FPGA
+    | MSNPU = 8 // MSNPU
+    | XLA = 9 // XLA / TPU
+
+/// Represents a device specification.
+[<Struct>]
+type Device =
+    | Device of DeviceType * int
+    member x.DeviceType = (let (Device(a,_)) = x in a)
+    member x.DeviceIndex = (let (Device(_,b)) = x in b)
+    static member CPU = Device(DeviceType.CPU, -1)
+    static member GPU = Device(DeviceType.CUDA, 0)
+
+    member internal x.Code = (int x.DeviceType <<< 4) + x.DeviceIndex
+
+    member internal x.Name =
+       (match x.DeviceType with
+        | DeviceType.CPU -> "cpu"
+        | DeviceType.CUDA -> "cuda"
+        | DeviceType.MKLDNN -> "mkldnn"
+        | DeviceType.OPENGL -> "opengl"
+        | DeviceType.OPENCL -> "opencl"
+        | DeviceType.IDEEP -> "ideep"
+        | DeviceType.HIP -> "hip"
+        | DeviceType.FPGA -> "fpga"
+        | DeviceType.MSNPU -> "msnpu"
+        | DeviceType.XLA -> "xla"
+        | _ -> failwith "unknown device type") + string x.DeviceIndex
+
+    override x.ToString() = x.Name
+
+/// Contains functions and settings related to device specifications.
+module Device = 
+
+    /// Get or set the default device used when creating tensors. Note, use <c>dsharp.config(...)</c> instead.
+    let mutable Default : Device = Device.CPU
diff --git a/src/TensorMath/Dtype.fs b/src/TensorMath/Dtype.fs
new file mode 100644
index 0000000..f521434
--- /dev/null
+++ b/src/TensorMath/Dtype.fs
@@ -0,0 +1,129 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+/// Represents a storage type for elements of a tensor
+[<Struct>]
+type Dtype =
+    /// Store elements as 16-bit floating point numbers (bfloat16 variation)
+    | [<Experimental("Support for bfloat16 is experimental. For the reference backend, float32 representations are used. For the Torch backend, numerous operations give exceptions, you should use float32 alternatives instead and convert the tensors.")>] 
+      BFloat16
+    /// Store elements as 16-bit floating point numbers
+    | [<Experimental("Support for float16 is experimental. For the reference backend, float32 representations are used. For the Torch backend, numerous operations give exceptions, you should use float32 alternatives instead and convert the tensors.")>]
+      Float16
+    /// Store elements as 32-bit floating point numbers
+    | Float32
+    /// Store elements as 64-bit floating point numbers
+    | Float64
+    /// Store elements as 8-bit integers
+    | Int8
+    /// Store elements as 8-bit unsigned integers
+    | Byte
+    /// Store elements as 16-bit signed integers
+    | Int16
+    /// Store elements as 32-bit signed integers
+    | Int32
+    /// Store elements as 64-bit signed integers
+    | Int64
+    /// Store elements as booleans
+    | Bool
+
+    member internal x.Name =
+        match x with
+        | BFloat16 -> "BFloat16"
+        | Float16 -> "Float16"
+        | Float32 -> "Float32"
+        | Float64 -> "Float64"
+        | Int8 -> "Int8"
+        | Byte -> "Byte"
+        | Int16 -> "Int16"
+        | Int32 -> "Int32"
+        | Int64 -> "Int64"
+        | Bool -> "Bool"
+
+    /// Gets the natural result of the Sum(), SumToSize() and Sum(dim) operation on this dtype
+    member t.SummationType =
+        match t with
+        | Bool | Byte | Int8 | Int16 | Int32 | Int64 -> Dtype.Int64
+        | dt -> dt
+
+    override x.ToString() = x.Name
+
+/// Contains global functions and settings related to tensor element types, used when writing backends.
+[<AutoOpen>]
+module DtypeAutoOpens =
+
+    type Dtype with
+        /// Matches all floating point tensor element types
+        member x.IsFloatingPoint =
+            match x with
+            | Float16 | BFloat16 | Float32 | Float64 -> true
+            | _ -> false
+
+        /// Matches all integral tensor element types
+        member x.IsIntegral =
+            match x with
+            | Byte | Int8 | Int16 | Int32 | Int64 -> true
+            | _ -> false
+
+    /// Raise an exception indicating the given operation is not supported for the given tensor element type.
+    let opNotSupported msg (dtype: Dtype) =
+        invalidOp (sprintf "operation '%s' not permitted on tensors of type %A" msg dtype)
+
+    /// Raise an exception indicating the given operation is not supported for the given tensor device type.
+    let opNotSupportedOnDeviceType msg (dtype: Dtype) (deviceType: DeviceType) =
+        invalidOp (sprintf "operation '%s' not permitted on tensors of type %A on device type %A" msg dtype deviceType)
+
+    /// Raise an exception indicating the given binary operation is not supported for the two given tensor element types.
+    let opNotSupported2 msg (dtype1: Dtype) (dtype2: Dtype) =
+        invalidOp (sprintf "operation '%s' not permitted on tensors of type (%A, %A)" msg dtype1 dtype2)
+
+/// Contains functions and settings related to tensor element types
+module Dtype =
+
+    /// Matches all floating point tensor element types
+    let (|FloatingPoint|_|) (x: Dtype) = if x.IsFloatingPoint then Some() else None
+
+    /// Matches all integral tensor element types
+    let (|Integral|_|) (x: Dtype) = if x.IsIntegral then Some() else None
+
+    /// Matches all integral or boolean tensor element types
+    let (|IntegralOrBool|_|) x =
+        match x with
+        | Integral | Bool -> Some()
+        | _ -> None
+
+    /// Find the Dtype into which dtype1 and dtype2 can be widened
+    let widen (dtype1: Dtype) (dtype2: Dtype) =
+        if dtype1 = dtype2 then Some dtype1
+        else
+            match dtype1, dtype2 with 
+            | Float64, _ | _, Float64 -> Some Float64
+            | Float32, _ | _, Float32 -> Some Float32
+            | BFloat16, _ | _, BFloat16 -> Some BFloat16
+            | Float16, _ | _, Float16 -> Some Float16
+            | Int64, _ | _, Int64 -> Some Int64
+            | Int32, _ | _, Int32 -> Some Int32
+            | Int16, _ | _, Int16 -> Some Int16
+            | Int8, Bool | Bool, Int8 -> Some Int8
+            | Byte, Bool | Bool, Byte -> Some Byte
+            | Int8, Int8 -> Some Int8
+            | Byte, Byte -> Some Byte
+            | Bool, Bool -> Some Bool
+            | Int8, Byte | Byte, Int8  -> None
+
+    /// Get or set the default element type used when creating tensors. Only floating point types are supported as the default type. Note, use <c>dsharp.config(...)</c> instead.
+    let mutable Default = Dtype.Float32
+
+    /// Find the Dtype which would result from dividing tensors with dtype1 and dtype2
+    let divisionType (dtype1: Dtype) (dtype2: Dtype) =
+        match dtype1.IsFloatingPoint, dtype2.IsFloatingPoint with
+        | false, false -> Default
+        | false, true -> dtype2
+        | true, false -> dtype1
+        | true, true -> (widen dtype1 dtype2).Value
+
+
diff --git a/src/TensorMath/Extensions.fs b/src/TensorMath/Extensions.fs
new file mode 100644
index 0000000..5352884
--- /dev/null
+++ b/src/TensorMath/Extensions.fs
@@ -0,0 +1,319 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath.Util
+
+open System
+open System.Collections.Generic
+open System.Collections.Specialized
+open System.Diagnostics.CodeAnalysis
+
+
+/// <summary>
+///   Contains extensions to the F# Array module. 
+/// </summary>
+///
+/// <namespacedoc>
+///   <summary>Contains utilities and library extensions related to the TensorMath programming model.</summary>
+/// </namespacedoc>
+module Array =
+
+    /// Determines if all values of the first array lie within the given tolerances of the second array.
+    [<ExcludeFromCodeCoverage>]
+    let inline allClose (relativeTolerance:'T) (absoluteTolerance:'T) (array1:'T[]) (array2:'T[]) =
+        let dim1 = array1.Length
+        let dim2 = array2.Length
+        if dim1 <> dim2 then false
+        else (array1,array2) ||> Array.forall2 (fun a b -> abs(a-b) <= absoluteTolerance + relativeTolerance*abs(b)) 
+
+    /// Gets the cumulative sum of the input array.
+    [<ExcludeFromCodeCoverage>]
+    let inline cumulativeSum (a:_[]) = (Array.scan (+) LanguagePrimitives.GenericZero a)[1..]
+
+    /// Gets the unique counts of the input array.
+    let getUniqueCounts (sorted:bool) (values:'T[]) =
+        let counts = Dictionary<'T, int>()
+        for v in values do
+            if counts.ContainsKey(v) then counts[v] <- counts[v] + 1 else counts[v] <- 1
+        if sorted then
+            counts |> Array.ofSeq |> Array.sortByDescending (fun (KeyValue(_, v)) -> v) |> Array.map (fun (KeyValue(k, v)) -> k, v) |> Array.unzip
+        else
+            counts |> Array.ofSeq |> Array.map (fun (KeyValue(k, v)) -> k, v) |> Array.unzip
+
+    // Create a 2D array using a flat representation
+    let initFlat2D i j f = Array.init (i*j) (fun ij -> f (ij/j) (ij%j))
+
+    // Create a 3D array using a flat representation
+    let initFlat3D i j k f = Array.init (i*j*k) (fun ijk -> f (ijk/j/k) ((ijk/k)%j) (ijk%k))
+
+    let foralli f (arr: 'T[]) =
+        let mutable i = 0
+        let n = arr.Length
+        while i < n && f i arr[i] do
+            i <- i + 1
+        (i = n)
+
+    // Copied from https://github.com/dotnet/fsharp/pull/11888 contributed by Jan Dryk (uxsoft)
+    let insertManyAt (index: int) (values: seq<'T>) (source: 'T[]) : 'T[] =
+        if index < 0 || index > source.Length then invalidArg "index" "index must be within bounds of the array"
+
+        let valuesArray = Seq.toArray values
+        if valuesArray.Length = 0 then source
+        else
+            let length = source.Length + valuesArray.Length
+            let result = Array.zeroCreate length
+            if index > 0 then
+                Array.Copy(source, result, index)
+            Array.Copy(valuesArray, 0, result, index, valuesArray.Length)
+            if source.Length - index > 0 then
+                Array.Copy(source, index, result, index + valuesArray.Length, source.Length - index)
+            result
+
+    // Copied from https://github.com/dotnet/fsharp/pull/11888 contributed by Jan Dryk (uxsoft)
+    let removeAt (index: int) (source: 'T[]) : 'T[] =
+        if index < 0 || index >= source.Length then invalidArg "index" "index must be within bounds of the array"
+        let length = source.Length - 1
+        let result = Array.zeroCreate length
+        if index > 0 then 
+            Array.Copy(source, result, index)
+        if length - index > 0 then
+            Array.Copy(source, index + 1, result, index, length - index)
+        result
+
+module Array4D =
+    /// Builds a new array whose elements are the results of applying the given function to each of the elements of the array.
+    let map mapping (array:'a[,,,]) =
+        Array4D.init (array.GetLength(0)) (array.GetLength(1)) (array.GetLength(2)) (array.GetLength(3)) (fun i j k l -> mapping array[i, j, k, l])
+
+// See https://github.com/dotnet/fsharp/issues/12013
+//type 'T array5d = 'T ``[,,,,]``
+//type 'T array6d = 'T ``[,,,,,]``
+
+module Array5D =
+    /// <summary></summary> <exclude />
+    let zeroCreate<'T> (length1:int) length2 length3 length4 length5 : Array =
+        System.Array.CreateInstance(typeof<'T>, [|length1;length2;length3;length4;length5|])
+
+    let get (array:Array) (index1:int) index2 index3 index4 index5 =
+        array.GetValue([|index1;index2;index3;index4;index5|])
+
+    let set (array:Array) (index1:int) index2 index3 index4 index5 value =
+        array.SetValue(value, [|index1;index2;index3;index4;index5|])
+   
+    let length1 (array: Array) = array.GetLength(0)
+    let length2 (array: Array) = array.GetLength(1)
+    let length3 (array: Array) = array.GetLength(2)
+    let length4 (array: Array) = array.GetLength(3)
+    let length5 (array: Array) = array.GetLength(4)
+
+    let init<'T> (length1:int) length2 length3 length4 length5 (initializer:int->int->int->int->int->'T) : Array =
+        let arr = zeroCreate<'T> length1 length2 length3 length4 length5
+        for i1=0 to length1-1 do
+            for i2=0 to length2-1 do
+                for i3=0 to length3-1 do
+                    for i4=0 to length4-1 do
+                        for i5=0 to length5-1 do
+                            set arr i1 i2 i3 i4 i5 (initializer i1 i2 i3 i4 i5)
+        arr
+
+    let create (length1:int) length2 length3 length4 length5 (initial:'T) = init length1 length2 length3 length4 length5 (fun _ _ _ _ _ -> initial)
+
+    let map mapping (array: Array) =
+        init (length1 array) (length2 array) (length3 array) (length4 array) (length5 array) (fun i1 i2 i3 i4 i5 -> mapping (get array i1 i2 i3 i4 i5))
+
+module Array6D =
+    let zeroCreate<'T> (length1:int) length2 length3 length4 length5 length6 : Array =
+        System.Array.CreateInstance(typeof<'T>, [|length1;length2;length3;length4;length5;length6|])
+
+    let get (array: Array) (index1: int) index2 index3 index4 index5 index6 =
+        array.GetValue([|index1;index2;index3;index4;index5;index6|])
+
+    let set (array: Array) (index1: int) index2 index3 index4 index5 index6 value =
+        array.SetValue(value, [|index1;index2;index3;index4;index5;index6|])
+
+    let length1 (array: Array) = array.GetLength(0)
+    let length2 (array: Array) = array.GetLength(1)
+    let length3 (array: Array) = array.GetLength(2)
+    let length4 (array: Array) = array.GetLength(3)
+    let length5 (array: Array) = array.GetLength(4)
+    let length6 (array: Array) = array.GetLength(5)
+
+    let init<'T> (length1: int) length2 length3 length4 length5 length6 (initializer: int->int->int->int->int->int->'T) =
+        let arr = zeroCreate<'T> length1 length2 length3 length4 length5 length6
+        for i1=0 to length1-1 do
+            for i2=0 to length2-1 do
+                for i3=0 to length3-1 do
+                    for i4=0 to length4-1 do
+                        for i5=0 to length5-1 do
+                            for i6=0 to length6-1 do
+                                set arr i1 i2 i3 i4 i5 i6 (initializer i1 i2 i3 i4 i5 i6)
+        arr
+
+    let create (length1: int) length2 length3 length4 length5 length6 (initial:'T) =
+        init length1 length2 length3 length4 length5 length6 (fun _ _ _ _ _ _ -> initial)
+
+    let map mapping (array: Array) =
+        init (length1 array) (length2 array) (length3 array) (length4 array) (length5 array) (length6 array) (fun i1 i2 i3 i4 i5 i6 -> mapping (get array i1 i2 i3 i4 i5 i6))
+
+
+// Notes about slicing 5d and 6d arrays if needed
+// #if SLICING
+// [<AutoOpen>]
+// module Array5DExtensions =
+//     type ``[,,,,]``<'T> with
+//         member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option) : ``[,,,,]``<'T> =
+//             failwith "tbd"
+//         member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option) : 'T[,,,] =
+//             failwith "tbd"
+//
+// let d = Array5D.zeroCreate<int> 2 2 2 2 2
+// d[0..0,0..0,0..0,0..0,0..0]
+// d[0,0..0,0..0,0..0,0..0]
+// #endif
+
+
+module ArrayND = 
+    /// Initializes an array with a given shape and initializer function.
+    let init (shape: int[]) (f: int[] -> 'T) : obj =
+        match shape with 
+        | [| |] -> f [| |]  :> _
+        | [| d1 |] -> Array.init d1 (fun i -> f [| i |]) :> _
+        | [| d1; d2 |] -> Array2D.init d1 d2 (fun i1 i2 -> f [| i1; i2 |]) :> _
+        | [| d1; d2; d3 |] -> Array3D.init d1 d2 d3 (fun i1 i2 i3 -> f [| i1; i2; i3 |]) :> _
+        | [| d1; d2; d3; d4 |] -> Array4D.init d1 d2 d3 d4 (fun i1 i2 i3 i4 -> f [| i1; i2; i3; i4 |]) :> _
+        | [| d1; d2; d3; d4; d5 |] -> Array5D.init d1 d2 d3 d4 d5 (fun i1 i2 i3 i4 i5 -> f [| i1; i2; i3; i4; i5 |]) :> _
+        | [| d1; d2; d3; d4; d5; d6 |] -> Array6D.init d1 d2 d3 d4 d5 d6 (fun i1 i2 i3 i4 i5 i6 -> f [| i1; i2; i3; i4; i5; i6 |]) :> _
+        | _ -> failwith "ArrayND.init not supported for dim > 6"
+
+    /// Initializes an array with a given shape and initializer function.
+    let zeroCreate (shape: int[]) : Array =
+        match shape with 
+        | [| |] -> [| |] :> _
+        | [| d1 |] -> Array.zeroCreate d1 :> _
+        | [| d1; d2 |] -> Array2D.zeroCreate d1 d2 :> _
+        | [| d1; d2; d3 |] -> Array3D.zeroCreate d1 d2 d3 :> _
+        | [| d1; d2; d3; d4 |] -> Array4D.zeroCreate d1 d2 d3 d4 :> _
+        | [| d1; d2; d3; d4; d5 |] -> Array5D.zeroCreate d1 d2 d3 d4 d5
+        | [| d1; d2; d3; d4; d5; d6 |] -> Array6D.zeroCreate d1 d2 d3 d4 d5 d6
+        | _ -> failwith "ArrayND.zeroCreate not supported for dim > 6"
+
+/// Contains extensions to the F# Seq module. 
+module Seq =
+
+    /// Gets the index of the maximum element of the sequence.
+    let maxIndex seq =  seq |> Seq.mapi (fun i x -> i, x) |> Seq.maxBy snd |> fst
+
+    /// Gets the index of the minimum element of the sequence.
+    let minIndex seq =  seq |> Seq.mapi (fun i x -> i, x) |> Seq.minBy snd |> fst
+
+    /// Indicates if all elements of the sequence are equal.
+    let allEqual (items:seq<'T>) =
+        let item0 = items |> Seq.head
+        items |> Seq.forall ((=) item0)
+
+    /// Gets the duplicate elements in the sequence.
+    let duplicates l =
+       l |> List.ofSeq
+       |> List.groupBy id
+       |> List.choose ( function
+              | _, x::_::_ -> Some x
+              | _ -> None )
+
+    /// Indicates if a sequence has duplicate elements.
+    let hasDuplicates l =
+        duplicates l |> List.isEmpty |> not
+
+    /// Like Seq.toArray but does not clone the array if the input is already an array
+    let inline toArrayQuick (xs: seq<'T>) =
+        match xs with
+        | :? ('T[]) as arr -> arr
+        | _ -> Seq.toArray xs
+
+/// Contains extensions related to .NET OrderedDictionary. 
+module OrderedDictionary =
+
+    /// Gets a fresh array containing the keys of the dictionary.
+    let copyKeys (dictionary:OrderedDictionary) =
+        let keys = Array.zeroCreate dictionary.Count
+        dictionary.Keys.CopyTo(keys, 0)
+        keys
+
+/// Contains extensions related to .NET Dictionary. 
+module Dictionary =
+
+    /// Gets a fresh array containing the keys of the dictionary.
+    let copyKeys (dictionary:Dictionary<'Key, 'Value>) =
+        let keys = Array.zeroCreate dictionary.Count
+        dictionary.Keys.CopyTo(keys, 0)
+        keys
+
+    /// Gets a fresh array containing the values of the dictionary.
+    let copyValues (dictionary:Dictionary<'Key, 'Value>) =
+        let values = Array.zeroCreate dictionary.Count
+        dictionary.Values.CopyTo(values, 0)
+        values
+
+/// Contains auto-opened extensions to the F# programming model.
+[<AutoOpen>]
+module ExtensionAutoOpens =
+
+    /// Indicates if a value is not null.
+    [<ExcludeFromCodeCoverage>]
+    let inline notNull value = not (obj.ReferenceEquals(value, null))
+
+    /// Creates a non-jagged 3D array from jagged data.
+    let array3D data = 
+        let data = data |> Array.ofSeq |> Array.map array2D
+        let r1, r2, r3 = data.Length, data[0].GetLength(0), data[0].GetLength(1)
+        for i in 0 .. r1-1 do 
+            let q2 = data[i].GetLength(0)
+            let q3 = data[i].GetLength(1)
+            if q2 <> r2 || q3 <> r3 then 
+                invalidArg "data" (sprintf "jagged input at position %d: first is _ x %d x %d, later is _ x %d x %d" i r2 r3 q2 q3)
+        Array3D.init r1 r2 r3 (fun i j k -> data[i][j,k])
+
+    /// Creates a non-jagged 4D array from jagged data.
+    let array4D data = 
+        let data = data |> array2D |> Array2D.map array2D
+        let r1,r2,r3,r4 = data.GetLength(0), data.GetLength(1), data[0,0].GetLength(0), data[0,0].GetLength(1)
+        for i in 0 .. r1-1 do 
+          for j in 0 .. r2-1 do 
+            let q3 = data[i,j].GetLength(0)
+            let q4 = data[i,j].GetLength(1)
+            if q3 <> r3 || q4 <> r4 then 
+                invalidArg "data" (sprintf "jagged input at position (%d,%d): first is _ x _ x %d x %d, later is _ x _ x %d x %d" i j r2 r3 q3 q4)
+        Array4D.init r1 r2 r3 r4 (fun i j k m -> data[i,j][k,m])
+
+    let array5D data =
+        let data = data |> Array.ofSeq |> Array.map array4D
+        let r1,r2,r3,r4,r5 = data.Length, data[0].GetLength(0), data[0].GetLength(1), data[0].GetLength(2), data[0].GetLength(3)
+        for i in 0 .. r1-1 do
+            let q2 = data[i].GetLength(0)
+            let q3 = data[i].GetLength(1)
+            let q4 = data[i].GetLength(2)
+            let q5 = data[i].GetLength(3)
+            if q2 <> r2 || q3 <> r3 || q4 <> r4 || q5 <> r5 then
+                invalidArg "data" (sprintf "jagged input at position %d: first is _ x %d x %d x %d x %d, later is _ x %d x %d x %d x %d" i r2 r3 r4 r5 q2 q3 q4 q5)
+        Array5D.init r1 r2 r3 r4 r5 (fun i1 i2 i3 i4 i5 -> data[i1][i2,i3,i4,i5])
+
+    let array6D data =
+        let data = data |> array2D |> Array2D.map array4D
+        let r1,r2,r3,r4,r5,r6 = data.GetLength(0), data.GetLength(1), data[0,0].GetLength(0), data[0,0].GetLength(1), data[0,0].GetLength(2), data[0,0].GetLength(3)
+        for i in 0 .. r1-1 do
+            for j in 0 .. r2-2 do
+                let q3 = data[i,j].GetLength(0)
+                let q4 = data[i,j].GetLength(1)
+                let q5 = data[i,j].GetLength(2)
+                let q6 = data[i,j].GetLength(3)
+                if q3 <> r3 || q4 <> r4 || q5 <> r5 || q6 <> r6 then
+                    invalidArg "data" (sprintf "jagged input at position (%d,%d): first is _ x _ x %d x %d x %d x %d, later is _ x _ x %d x %d x %d x %d" i j r3 r4 r5 r6 q3 q4 q5 q6)
+        Array6D.init r1 r2 r3 r4 r5 r6 (fun i1 i2 i3 i4 i5 i6 -> data[i1,i2][i3,i4,i5,i6])
+
+    /// Print the given value to the console using the '%A' printf format specifier
+    let print x = printfn "%A" x 
+
+
+[<assembly: System.Runtime.CompilerServices.InternalsVisibleTo("TensorMath.Tests")>]
+do()
diff --git a/src/TensorMath/Library.fs b/src/TensorMath/Library.fs
deleted file mode 100644
index 58a485d..0000000
--- a/src/TensorMath/Library.fs
+++ /dev/null
@@ -1,5 +0,0 @@
-﻿namespace TensorMath
-
-module Say =
-    let hello name =
-        printfn "Hello %s" name
diff --git a/src/TensorMath/Op.AvgPool.fs b/src/TensorMath/Op.AvgPool.fs
new file mode 100644
index 0000000..24e8a50
--- /dev/null
+++ b/src/TensorMath/Op.AvgPool.fs
@@ -0,0 +1,101 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpAvgPoolExtensions =
+
+    type Tensor with
+        /// <summary>Applies a 1D average pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+        /// <param name="kernelSize">The size of the window to take a max over.</param>
+        /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+        /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+        member a.avgpool1d(kernelSize:int, ?stride:int, ?padding:int(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            let stride = defaultArg stride kernelSize
+            let padding = defaultArg padding 0
+            //let ceil_mode = defaultArg ceil_mode false
+            //let count_include_pad= defaultArg count_include_pad true
+            Shape.checkCanAvgpool1d a.dtype a.shape kernelSize stride padding |> ignore
+            TensorC(a.primalRaw.AvgPool1D(kernelSize, stride, padding(* , ceil_mode, count_include_pad *)))
+
+        member internal a.avgpoolReverse1d(originalInput:Tensor, kernelSize:int, ?stride:int, ?padding:int(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            let stride = defaultArg stride kernelSize
+            let padding = defaultArg padding 0
+            //let ceil_mode = defaultArg ceil_mode false
+            //let count_include_pad= defaultArg count_include_pad true
+            TensorC(a.primalRaw.AvgPoolReverse1D(originalInput.primalRaw, kernelSize, stride, padding(* , ceil_mode, count_include_pad *)))
+
+        /// <summary>Applies a 1D average pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+        /// <param name="kernelSize">The size of the window to take a max over.</param>
+        /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+        /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+        /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+        /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+        /// <param name="paddings">The implicit zero paddings to be added on both sides.</param>
+        member a.avgpool2d(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            let kernelSizes, strides, paddings = Shape.resolve2dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+            //let ceil_mode = defaultArg ceil_mode false
+            //let count_include_pad= defaultArg count_include_pad true
+            Shape.checkCanAvgpool2d a.dtype a.shape kernelSizes strides paddings  |> ignore
+            TensorC(a.primalRaw.AvgPool2D(kernelSizes, strides, paddings(* , ceil_mode, count_include_pad *)))
+
+        member internal a.avgpoolReverse2d(originalInput:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            let kernelSizes, strides, paddings = Shape.resolve2dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+            //let ceil_mode = defaultArg ceil_mode false
+            //let count_include_pad= defaultArg count_include_pad true
+            TensorC(a.primalRaw.AvgPoolReverse2D(originalInput.primalRaw, kernelSizes, strides, paddings(* , ceil_mode, count_include_pad *)))
+
+        /// <summary>Applies a 3D average pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+        /// <param name="kernelSize">The size of the window to take a max over.</param>
+        /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+        /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+        /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+        /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+        /// <param name="paddings">The implicit zero paddings to be added on both sides.</param>
+        member a.avgpool3d(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            let kernelSizes, strides, paddings = Shape.resolve3dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+            //let ceil_mode = defaultArg ceil_mode false
+            //let count_include_pad= defaultArg count_include_pad true
+            Shape.checkCanAvgpool3d a.dtype a.shape kernelSizes strides paddings  |> ignore
+            TensorC(a.primalRaw.AvgPool3D(kernelSizes, strides, paddings(* , ceil_mode, count_include_pad *)))
+
+        member internal a.avgpoolReverse3d(originalInput:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            let kernelSizes, strides, paddings = Shape.resolve3dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+            //let ceil_mode = defaultArg ceil_mode false
+            //let count_include_pad= defaultArg count_include_pad true
+            TensorC(a.primalRaw.AvgPoolReverse3D(originalInput.primalRaw, kernelSizes, strides, paddings(* , ceil_mode, count_include_pad *)))
+
+    type dsharp with
+        /// <summary>Applies a 1D average pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+        /// <param name="input">The input tensor.</param>
+        /// <param name="kernelSize">The size of the window to take a max over.</param>
+        /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+        /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+        static member avgpool1d(input: Tensor, kernelSize:int, ?stride:int, ?padding:int(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            input.avgpool2d(kernelSize=kernelSize, ?stride=stride, ?padding=padding(* , ?ceil_mode=ceil_mode, ?count_include_pad=count_include_pad *))
+
+        /// <summary>Applies a 2D average pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+        /// <param name="input">The input tensor.</param>
+        /// <param name="kernelSize">The size of the window to take a max over.</param>
+        /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+        /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+        /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+        /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+        /// <param name="paddings">The implicit zero paddings to be added on both sides.</param>
+        static member avgpool2d(input: Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            input.avgpool2d(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings(* , ?ceil_mode=ceil_mode, ?count_include_pad=count_include_pad *))
+
+        /// <summary>Applies a 2D average pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+        /// <param name="input">The input tensor.</param>
+        /// <param name="kernelSize">The size of the window to take a max over.</param>
+        /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+        /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+        /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+        /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+        /// <param name="paddings">The implicit zero paddings to be added on both sides.</param>
+        static member avgpool3d(input: Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>(* , ?ceil_mode: bool, ?count_include_pad: bool *)) =
+            input.avgpool3d(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings(* , ?ceil_mode=ceil_mode, ?count_include_pad=count_include_pad *))
+
diff --git a/src/TensorMath/Op.BMM.fs b/src/TensorMath/Op.BMM.fs
new file mode 100644
index 0000000..2438f38
--- /dev/null
+++ b/src/TensorMath/Op.BMM.fs
@@ -0,0 +1,22 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpBMMExtensions =
+
+    type Tensor with
+        /// <summary>Batched matrix product of two tensors. Tensors <paramref name="b" /> must be 3d tensors each containing the same number of matrices. If the tensor is a \(b \times n \times m\) tensor, and <paramref name="b" /> is a \(b \times m \times p\) tensor, the result will be a \(b \times n \times p\) tensor.</summary>
+        /// <param name="b">The second tensor.</param>
+        member a.bmm(b:Tensor) =
+            Shape.checkCanBMM a.shape b.shape |> ignore
+            TensorC(a.primalRaw.BMMTT(b.primalRaw))
+
+    type dsharp with
+        /// <summary>Batched matrix product of two tensors. Tensors <paramref name="a" /> and  <paramref name="b" /> must be 3d tensors each containing the same number of matrices. If <paramref name="a" /> is a \(b \times n \times m\) tensor, <paramref name="b" /> is a \(b \times m \times p\) tensor, the result will be a \(b \times n \times p\) tensor.</summary>
+        /// <param name="a">The first tensor.</param>
+        /// <param name="b">The second tensor.</param>
+        static member bmm(a:Tensor, b:Tensor) = a.bmm(b)
diff --git a/src/TensorMath/Op.Det.fs b/src/TensorMath/Op.Det.fs
new file mode 100644
index 0000000..3776808
--- /dev/null
+++ b/src/TensorMath/Op.Det.fs
@@ -0,0 +1,17 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpDetExtensions =
+
+    type Tensor with
+        member a.det() =
+            Shape.checkCanDet a.shape
+            TensorC(a.primalRaw.DetT())
+
+    type dsharp with
+        static member det(a:Tensor) = a.det()
diff --git a/src/TensorMath/Op.Inv.fs b/src/TensorMath/Op.Inv.fs
new file mode 100644
index 0000000..3cdd435
--- /dev/null
+++ b/src/TensorMath/Op.Inv.fs
@@ -0,0 +1,17 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpInvExtensions =
+
+    type Tensor with
+        member a.inv() =
+            Shape.checkCanInvert a.shape
+            TensorC(a.primalRaw.InverseT())
+
+    type dsharp with
+        static member inv(a:Tensor) = a.inv()
diff --git a/src/TensorMath/Op.Norm.fs b/src/TensorMath/Op.Norm.fs
new file mode 100644
index 0000000..93302a5
--- /dev/null
+++ b/src/TensorMath/Op.Norm.fs
@@ -0,0 +1,30 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpNormExtensions =
+
+    type Tensor with
+        member a.norm(?order:float, ?dim:int, ?keepDim:bool) =
+            if not (a.dtype = Dtype.Float32 || a.dtype = Dtype.Float64) then failwithf "Vector norm is only supported for Float32 and Float64 dtypes."
+            let order = defaultArg order 2.
+            match order, dim with
+            | 1., None -> a.flatten().abs().sum()
+            | 1., Some(dim) -> a.abs().sum(dim=dim, ?keepDim=keepDim)
+            | 2., None -> let aa = a.flatten() in (aa * aa).sum().sqrt()
+            | 2., Some(dim) -> (a * a).sum(dim=dim, ?keepDim=keepDim).sqrt()
+            | System.Double.PositiveInfinity, None -> a.flatten().abs().max()
+            | System.Double.PositiveInfinity, Some(dim) -> a.abs().max(dim=dim, ?keepDim=keepDim)
+            | System.Double.NegativeInfinity, None -> a.flatten().abs().min()
+            | System.Double.NegativeInfinity, Some(dim) -> a.abs().min(dim=dim, ?keepDim=keepDim)
+            | 0., None -> a.ne(a.zerosLike()).cast(dtype=a.dtype).sum()
+            | 0., Some(dim) -> a.ne(a.zerosLike()).cast(dtype=a.dtype).sum(dim=dim, ?keepDim=keepDim)
+            | order, None -> a.abs().pow(order).sum().pow(1./order)
+            | order, Some(dim) -> a.abs().pow(order).sum(dim=dim, ?keepDim=keepDim).pow(1./order)
+
+    type dsharp with
+        static member norm(a:Tensor, ?order:float, ?dim:int, ?keepDim:bool) = a.norm(?order=order, ?dim=dim, ?keepDim=keepDim)
diff --git a/src/TensorMath/Op.Outer.fs b/src/TensorMath/Op.Outer.fs
new file mode 100644
index 0000000..db42b81
--- /dev/null
+++ b/src/TensorMath/Op.Outer.fs
@@ -0,0 +1,24 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpOuterExtensions =
+
+    type Tensor with
+        /// <summary>Outer product of two tensors.</summary>
+        /// <param name="b">The second tensor.</param>
+        member a.outer(b:Tensor) =
+            match a.dim, b.dim with
+            | 1, 1 -> a.unsqueeze(1).matmul(b.unsqueeze(0))
+            | 2, 2 when a.shape[0] = b.shape[0] -> a.unsqueeze(2).bmm(b.unsqueeze(1))  // Batched outer product
+            | _ -> failwithf "Outer product unsupported for tensor shapes %A %A" a.shape b.shape
+
+    type dsharp with
+        /// <summary>Outer product of two tensors.</summary>
+        /// <param name="a">The first tensor.</param>
+        /// <param name="b">The second tensor.</param>
+        static member outer(a:Tensor, b:Tensor) = a.outer(b)
diff --git a/src/TensorMath/Op.Solve.fs b/src/TensorMath/Op.Solve.fs
new file mode 100644
index 0000000..ba3bf93
--- /dev/null
+++ b/src/TensorMath/Op.Solve.fs
@@ -0,0 +1,17 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+[<AutoOpen>]
+module OpSolveExtensions =
+
+    type Tensor with
+        member a.solve(b:Tensor) =
+            let _ = Shape.checkCanSolve a.shape b.shape
+            TensorC(a.primalRaw.SolveTT(b.primalRaw))
+
+    type dsharp with
+        static member solve(a:Tensor, b:Tensor) = a.solve(b)
diff --git a/src/TensorMath/Printer.fs b/src/TensorMath/Printer.fs
new file mode 100644
index 0000000..0b08513
--- /dev/null
+++ b/src/TensorMath/Printer.fs
@@ -0,0 +1,39 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+type Printer =
+    | Default
+    | Short
+    | Full
+    | Custom of threshold: int * edgeItems: int * precision: int
+
+    member p.threshold =
+        match p with
+        | Default -> 100
+        | Short -> 10
+        | Full -> System.Int32.MaxValue
+        | Custom(t, _, _) -> t
+
+    member p.edgeItems =
+        match p with
+        | Default -> 3
+        | Short -> 2
+        | Full -> -1
+        | Custom(_, e, _) -> e
+
+    member p.precision =
+        match p with
+        | Default -> 4
+        | Short -> 2
+        | Full -> 4
+        | Custom(_, _, p) -> p
+
+/// Contains functions and settings related to print options.
+module Printer = 
+
+    /// Get or set the default printer used when printing tensors. Note, use <c>dsharp.config(...)</c> instead.
+    let mutable Default : Printer = Printer.Default
\ No newline at end of file
diff --git a/src/TensorMath/RawTensor.fs b/src/TensorMath/RawTensor.fs
new file mode 100644
index 0000000..2784110
--- /dev/null
+++ b/src/TensorMath/RawTensor.fs
@@ -0,0 +1,918 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace rec TensorMath.Backends
+
+open System
+open TensorMath
+open TensorMath.Util
+
+/// <summary>
+///   Represents the static functionality for tensors implemented by a TensorMath backend.
+/// </summary>
+///
+/// <namespacedoc>
+///   <summary>Contains types and functionality related to backend implementations for TensorMath.</summary>
+/// </namespacedoc>
+[<AbstractClass>]
+type BackendTensorStatics() = 
+    // cache for most recently accessed backend
+    static let hook = BackendFunctionality<BackendTensorStatics>()
+
+    /// Sets the seed for the default random number generator of the backend
+    abstract Seed: seed:int -> unit
+
+    /// Gets the scalar 0 tensor for the given device
+    abstract Zero: dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with arbitrary values for the given shape and device
+    abstract Empty: shape:Shape * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with zeros for the given shape and device
+    abstract Zeros: shape:Shape * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets the scalar 1 tensor for the given device
+    abstract One: dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with ones for the given shape and device
+    abstract Ones: shape:Shape * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with the given value for the given shape and device
+    abstract Full: shape:Shape * value: scalar * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with random values for the given shape and device
+    abstract Random: shape:Shape * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with random values from the normal distribution for the given shape and device
+    abstract RandomNormal: shape:Shape * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets a tensor filled with random integers from the given range for the given shape and device
+    abstract RandomInt: shape:Shape * low:int * high:int * dtype: Dtype * device: Device -> RawTensor
+
+    /// Gets the devices supported by this backend
+    abstract GetDevices: ?deviceType: DeviceType -> Device list
+
+    /// Indicates if a device type is supported by this backend
+    abstract IsDeviceTypeAvailable: deviceType: DeviceType -> bool
+    
+    /// Seed all backends with the given random seed, or a new seed based on the current time
+    /// if no seed is specified.
+    static member Seed(?seed:int) =
+        let seed = defaultArg seed (int DateTime.Now.Ticks)
+        Random.Seed(seed) // Do not remove. util.Random seed would be set by the Reference backend if it's currently loaded. However we still need to keep this here to ensure util.Random seed is set (it may be used in code other than the Reference backend).
+        for KeyValue(_, backend) in hook.Backends do
+            backend.Seed(seed)
+
+    /// Create a tensor of appropriate dtype from a scalar or array of appropriate values.
+    /// A backend type is delivered consistent with in-memory data - a type for dtype Int32 gets int32 data etc.
+    abstract CreateFromFlatArray: data: System.Array * shape: Shape * dtype: Dtype * device: Device -> RawTensor
+
+    /// Get the backend implementation for the given tensor element type and backend.
+    static member Get(?backend: Backend) =
+        hook.Get(?backend=backend)
+
+/// <summary>
+///   Represents a raw (i.e. non-differentiable immutable) tensor implemented by a TensorMath backend.
+/// </summary>
+///
+/// <remarks>
+///  Each backend will provide one of more .NET implementations of this type, which may in turn
+///  wrap handles to native implementations.
+/// </remarks>
+[<AbstractClass>]
+type RawTensor() =
+
+    /// Gets the shape of the tensor
+    abstract Shape: Shape
+
+    /// Gets the dimensionality of the tensor
+    abstract Dim: int
+
+    /// Gets the number of elements in the tensor
+    // TODO: int32 might not be enough for very large tensors
+    abstract Nelement: int
+
+    /// Gets the element storage type for the tensor
+    abstract Dtype: Dtype
+
+    /// Gets the device for the tensor
+    abstract Device: Device
+
+    /// Gets the device type for the tensor
+    abstract DeviceType: DeviceType
+
+    /// Gets the backend for the tensor
+    abstract Backend: Backend
+
+    /// Gets a handle to the underlying representation of the the tensor. For example, if the Torch
+    /// backend is used this will be the corresponding TorchSharp TorchTensor.
+    abstract Handle: obj
+
+    override t.ToString() = t.Print()
+    
+    /// Gets a tensor containing arbitrary values for the given shape and configuration
+    static member Empty(shape:Shape, ?dtype, ?device, ?backend) = 
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.Empty(shape, dtype, device)
+
+    /// Gets the scalar zero tensor for the given configuration
+    static member Zero(?dtype, ?device, ?backend) = 
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.Zero(dtype, device)
+
+    /// Gets the zero tensor for the given shape and configuration
+    static member Zeros(shape:Shape, ?dtype, ?device, ?backend) = 
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.Zeros(shape, dtype, device)
+
+    /// Gets the scalar 1 tensor for the given configuration
+    static member One(?dtype, ?device, ?backend) = 
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.One(dtype, device)
+
+    /// Gets a tensor filled with 1 values for the given shape and configuration
+    static member Ones(shape:Shape, ?dtype, ?device, ?backend) =
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.Ones(shape, dtype, device)
+
+    /// Gets a tensor filled with the given value for the given shape and configuration
+    static member Full(shape:Shape, value, ?dtype, ?device, ?backend) =
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.Full(shape, value, dtype, device)
+
+    /// Gets a tensor filled with random values for the given shape and configuration
+    static member Random(shape:Shape, ?dtype, ?device, ?backend) =
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.Random(shape, dtype, device)
+
+    /// Gets a tensor filled with random values from the normal distribution for the given shape and configuration
+    static member RandomNormal(shape:Shape, ?dtype, ?device, ?backend) =
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.RandomNormal(shape, dtype, device)
+
+    /// Gets a tensor filled with random integer values from the given range for the given shape and configuration
+    static member RandomInt(shape:Shape, low, high, ?dtype, ?device, ?backend) =
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device.Default
+        statics.RandomInt(shape, low, high, dtype, device)
+
+    /// <summary>
+    ///   Gets a tensor filled with values drawn from the given .NET object.
+    /// </summary>
+    ///
+    /// <remarks>
+    ///  The value may be a scalar, an array, or an array of tupled objects. If the <c>dtype</c> is not specified
+    ///  then it is inferred from the .NET type of the object.
+    /// </remarks>
+    static member Create(values: obj, ?dtype, ?device, ?backend) =
+        // We deliver consistent in-memory data to the backend - a dtype Int32 gets int32 etc.
+        let data, shape, dtype2 =
+            match dtype with 
+            | Some Dtype.Int64 ->
+                let a,s = DataConverter.dataOfValuesForInt64 values
+                (a :> Array), s, Dtype.Int64
+            | Some Dtype.Int32 ->
+                let a,s = DataConverter.dataOfValuesForInt32 values
+                (a :> Array), s, Dtype.Int32
+            | Some Dtype.Int16 ->
+                let a,s = DataConverter.dataOfValuesForInt16 values
+                (a :> Array), s, Dtype.Int16
+            | Some Dtype.Int8 ->
+                let a,s = DataConverter.dataOfValuesForInt8 values
+                (a :> Array), s, Dtype.Int8
+            | Some Dtype.Byte ->
+                let a,s = DataConverter.dataOfValuesForByte values
+                (a :> Array), s, Dtype.Byte
+            | Some Dtype.Bool ->
+                let a,s = DataConverter.dataOfValuesForBool values
+                (a :> Array), s, Dtype.Bool
+            | Some Dtype.Float64 ->
+                let a,s = DataConverter.dataOfValuesForFloat64 values
+                (a :> Array), s, Dtype.Float64
+            | Some Dtype.Float32 ->
+                let a,s = DataConverter.dataOfValuesForFloat32 values
+                (a :> Array), s, Dtype.Float32
+            | Some Dtype.Float16 ->
+                let a,s = DataConverter.dataOfValuesForFloat32 values
+                (a :> Array), s, Dtype.Float16
+            | Some Dtype.BFloat16 ->
+                let a,s = DataConverter.dataOfValuesForFloat32 values
+                (a :> Array), s, Dtype.BFloat16
+            // If no dtype is given, use a dtype inferred from the given data. This is consistent with PyTorch's behavior.
+            | None ->
+                match values |> DataConverter.tryFlatArrayAndShape<float32> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Float32)
+                | _ ->
+                // Exception: If data is double and no dtype is given by the user, prefer a Float32 tensor
+                match values |> DataConverter.tryFlatArrayAndShape<double> with
+                | Some (values, shape) -> ((values |> Array.map float32 :> Array), shape, Dtype.Float32)
+                | _ ->
+                match values |> DataConverter.tryFlatArrayAndShape<int64> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Int64)
+                | _ ->
+                match values |> DataConverter.tryFlatArrayAndShape<int32> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Int32)
+                | _ ->
+                match values |> DataConverter.tryFlatArrayAndShape<int16> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Int16)
+                | _ ->
+                match values |> DataConverter.tryFlatArrayAndShape<bool> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Bool)
+                | _ ->
+                match values |> DataConverter.tryFlatArrayAndShape<byte> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Byte)
+                | _ ->
+                match values |> DataConverter.tryFlatArrayAndShape<int8> with
+                | Some (values, shape) -> ((values :> Array), shape, Dtype.Int8)
+                | _ ->
+                failwithf "Cannot create tensor from data: %A" values
+
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let device = defaultArg device Device.Default
+
+        statics.CreateFromFlatArray(data, shape, dtype2, device)
+
+    static member CreateFromFlatArray(values: Array, shape:Shape, ?dtype, ?device, ?backend) =
+        let statics = BackendTensorStatics.Get(?backend=backend)
+        let dtype = defaultArg dtype Dtype.Default
+        let device = defaultArg device Device .Default
+        statics.CreateFromFlatArray(values, shape, dtype, device)
+
+    /// Gets a tensor filled with values drawn from the given .NET object for the
+    /// given configuration settings, defaulting to the configuration settings of the object tensor.
+    member t.CreateLike(values: obj, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Create(values, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with arbitrary values for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.EmptyLike(shape: Shape, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Empty(shape=shape, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a zero tensor for the given configuration settings, defaulting to the configuration settings of the object tensor
+    member t.ZeroLike(?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Zero(dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with zero values for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.ZerosLike(shape: Shape, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Zeros(shape=shape, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a scalar one tensor for the given configuration settings, defaulting to the configuration settings of the object tensor
+    member t.OneLike(?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.One(dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with one values for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.OnesLike(shape: Shape, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Ones(shape=shape, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with the given scalar value for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.FullLike(shape: Shape, value: scalar, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Full(shape, value, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with random values for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.RandomLike(shape: Shape, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.Random(shape=shape, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with random values from a normal distribution for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.RandomNormalLike(shape: Shape, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.RandomNormal(shape=shape, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Gets a tensor filled with random integer values from the given range for the given shape and configuration settings,
+    /// defaulting to the configuration settings of the object tensor
+    member t.RandomIntLike(shape: Shape, low:int, high:int, ?dtype: Dtype, ?device: Device, ?backend: Backend) =
+        RawTensor.RandomInt(shape=shape, low=low, high=high, dtype=defaultArg dtype t.Dtype, device=defaultArg device t.Device, backend=defaultArg backend t.Backend)
+
+    /// Clone the underlying storage of the tensor.
+    abstract Clone: unit -> RawTensor
+
+    /// Expand the shape of the tensor.
+    abstract Expand: newShape: Shape -> RawTensor
+
+    /// Stack the given tensors along the given dimension
+    abstract StackTs: tensors: RawTensor[] * dim:int -> RawTensor
+
+    /// Unstack the given tensors along the given dimension
+    abstract UnstackT: dim:int -> RawTensor[]
+
+    /// Concatenate the given tensors along the given dimension
+    abstract CatTs: tensors: RawTensor[] * dim: int -> RawTensor
+
+    /// Split the given tensors along the given dimensions
+    abstract SplitT: sizes: int[] * dim: int -> RawTensor[]
+
+    /// <summary> Get a slice of the given tensor.</summary>
+    ///
+    /// <param name="fullBounds">
+    ///  The indexes are an Nx3 array.  The first row is the start bounds, the second row is
+    ///  the end bounds, the third is 1/0 indicating dimension removal.
+    /// </param>
+    abstract GetSlice: fullBounds: int[,] -> RawTensor
+
+    /// Gets a .NET object representing the value of the tensor at the given indexes
+    abstract GetItem: [<ParamArray>] indexes: int[] -> scalar
+
+    /// Gets a .NET object representing the value of a scalar tensor 
+    abstract ToScalar: unit -> scalar
+
+    /// <summary>Get a .NET object for all the values in the tensor.</summary>
+    ///
+    /// <remarks>The runtime type of the returned object is either a .NET scalar
+    /// or array corresponding to the shape and element type of the tensor.</remarks>
+    abstract ToValues: unit -> obj
+
+    /// Compare two tensors for equality
+    abstract Equals: t2: RawTensor -> bool
+
+    /// Returns a tensor where the elements have each been cast to the given tensor element storage type.
+    abstract Cast: dtype: Dtype -> RawTensor
+
+    /// Returns a tensor moved to the given device.
+    abstract MoveTo: device: Device -> RawTensor
+
+    /// Returns a hash of the contents of the tensor. This operation may cause the
+    /// tensor to be moved to the CPU, and its entire contents iterated.
+    abstract ComputeHash: unit -> int
+
+    /// Indicates if the two tensors have the same shape and element type, and all corresponding values
+    /// are equal up to the given tolerances.
+    abstract AllClose: t2: RawTensor * relativeTolerance: float * absoluteTolerance: float -> bool
+
+    /// Returns a tensor with values constrained by the corresponding elements in the low/high tensors.
+    abstract ClampT: low: RawTensor * high: RawTensor -> RawTensor
+
+    /// Returns a tensor selecting the given indices from the given dimension and stacking those in the order specified.
+    abstract GatherT: dim: int * indices: RawTensor -> RawTensor
+
+    /// Returns a tensor with given destination shape where values are copied from the current tensor to locations specified by the dimension and indices.
+    abstract ScatterT: dim: int * indices: RawTensor * destinationShape: Shape -> RawTensor
+
+    /// Returns a boolean tensor comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract LtTT: t2: RawTensor -> RawTensor
+
+    /// Returns a boolean tensor comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract GtTT: t2: RawTensor -> RawTensor
+
+    /// Returns a boolean tensor comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract LeTT: t2: RawTensor -> RawTensor
+
+    /// Returns a boolean tensor comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract GeTT: t2: RawTensor -> RawTensor
+
+    /// Returns a boolean tensor comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract EqTT: t2: RawTensor -> RawTensor
+
+    /// Returns a boolean tensor comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract NeqTT: t2: RawTensor -> RawTensor
+
+    /// Returns a boolean tensor where each element indicates if the corresponding element in the tensor is an infinity value
+    abstract IsInfT: unit -> RawTensor
+
+    /// Returns a boolean tensor where each element indicates if the corresponding element in the tensor is a NaN value
+    abstract IsNaNT: unit -> RawTensor
+
+    /// Gets a tensor containing values and indexes of a maximum value of the tensor reducing along the given dimension
+    abstract MaxReduceT: dim: int * keepdim: bool -> RawTensor * RawTensor
+
+    /// Gets the index of a maximum value of the tensor 
+    abstract MaxIndexT: unit -> int[]
+
+    /// Gets a tensor containing values and indexes of a minimum value of the tensor reducing along the given dimension
+    abstract MinReduceT: dim: int * keepdim: bool -> RawTensor * RawTensor
+
+    /// Gets the index of a minimum value of the tensor
+    abstract MinIndexT: unit -> int[]
+
+    /// Returns the element-wise addition of the two tensors
+    abstract AddTT: RawTensor * ?alpha: scalar -> RawTensor
+
+    /// Returns the element-wise addition of a tensor and a scalar
+    abstract AddTT0: b: scalar * ?alpha: scalar -> RawTensor
+
+    /// Adds a slice of <c>t2</c> at the given location to the tensor
+    abstract AddTTSlice: location: int[] * t2: RawTensor -> RawTensor
+
+    /// Returns the element-wise subtraction of two tensors
+    abstract SubTT: t2: RawTensor -> RawTensor
+
+    /// Returns the element-wise subtraction of the scalar and a tensor, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract SubFromT0T: t1: scalar -> RawTensor
+
+    /// Returns the element-wise subtraction of the tensor and a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract SubTT0: t2: scalar -> RawTensor
+
+    /// Returns the element-wise multiplication of two tensors
+    abstract MulTT: t2: RawTensor -> RawTensor
+
+    /// Returns the element-wise multiplication of a tensor and a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract MulTT0: t2: scalar -> RawTensor
+
+    /// Returns the element-wise division of two tensors
+    abstract DivTT: t2: RawTensor -> RawTensor
+
+    /// Returns the element-wise division of a scalar by a tensor, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract DivFromT0T: t1: scalar -> RawTensor
+
+    /// Returns the element-wise division of a tensor by a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract DivTT0: t2: scalar -> RawTensor
+
+    /// Returns the element-wise exponentiation of two tensors
+    abstract PowTT: t2: RawTensor -> RawTensor
+
+    /// Returns the element-wise exponentiation of a scalar and a tensor, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract PowFromT0T: t1: scalar -> RawTensor
+
+    /// Returns the element-wise exponentiation of a tensor and a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract PowTT0: t2: scalar -> RawTensor
+
+    /// Returns the matrix multiplication of two tensors
+    abstract MatMulTT: t2: RawTensor -> RawTensor
+
+    /// Returns the batched matrix multiplication of two tensors
+    abstract BMMTT: t2: RawTensor -> RawTensor
+
+    /// Returns the 1D maxpool of a tensor and its chosen maximum indices
+    abstract MaxPool1D: kernelSize: int * stride: int * padding: int -> RawTensor * RawTensor
+
+    /// Returns the 2D maxpool of a tensor and its chosen maximum indices
+    abstract MaxPool2D: kernelSize: int[] * strides: int[] * padding: int[] -> RawTensor * RawTensor
+
+    /// Returns the 3D maxpool of a tensor and its chosen maximum indices
+    abstract MaxPool3D: kernelSize: int[] * strides: int[] * padding: int[] -> RawTensor * RawTensor
+
+    /// Returns the 1D maxunpool of a tensor using the given indices for locations of maximums
+    abstract MaxUnpool1D: indices: RawTensor * outputSize: int[] -> RawTensor
+
+    /// Returns the 2D maxunpool of a tensor using the given indices for locations of maximums
+    abstract MaxUnpool2D: indices: RawTensor * outputSize: int[] -> RawTensor
+
+    /// Returns the 3D maxunpool of a tensor using the given indices for locations of maximums
+    abstract MaxUnpool3D: indices: RawTensor * outputSize: int[] -> RawTensor
+
+    /// Returns the 1D avgpool of a tensor 
+    abstract AvgPool1D: kernelSize: int * stride: int * padding: int (* * ceil_mode: bool * count_include_pad: bool *) -> RawTensor
+
+    /// Returns the 2D avgpool of a tensor 
+    abstract AvgPool2D: kernelSize: int[] * stride: int[] * padding: int[] (* * ceil_mode: bool * count_include_pad: bool *) -> RawTensor
+
+    /// Returns the 2D avgpool of a tensor 
+    abstract AvgPool3D: kernelSize: int[] * stride: int[] * padding: int[] (* * ceil_mode: bool * count_include_pad: bool *) -> RawTensor
+
+    /// <summary>Returns the reverse mode of a 1D avgpool of a tensor, apportioning each part of the adjoint equally to each corresponding input</summary>
+    /// <remarks>The originalInput parameter is only used for shape information</remarks>
+    abstract AvgPoolReverse1D: originalInput: RawTensor * kernelSize: int * stride: int * padding: int (* * ceil_mode: bool * count_include_pad: bool *) -> RawTensor
+
+    /// <summary>Returns the reverse mode of a 2D avgpool of a tensor, apportioning each part of the adjoint equally to each corresponding input</summary>
+    /// <remarks>The originalInput parameter is only used for shape information</remarks>
+    abstract AvgPoolReverse2D: originalInput: RawTensor * kernelSize: int[] * stride: int[] * padding: int[] (* * ceil_mode: bool * count_include_pad: bool *) -> RawTensor
+
+    /// <summary>Returns the reverse mode of a 3D avgpool of a tensor, apportioning each part of the adjoint equally to each corresponding input</summary>
+    /// <remarks>The originalInput parameter is only used for shape information</remarks>
+    abstract AvgPoolReverse3D: originalInput: RawTensor * kernelSize: int[] * stride: int[] * padding: int[] (* * ceil_mode: bool * count_include_pad: bool *) -> RawTensor
+
+    /// Returns the 1D convolution of the tensor
+    abstract Conv1D: kernel: RawTensor * stride: int * padding: int -> RawTensor
+
+    /// Returns the 2D convolution of the tensor
+    abstract Conv2D: kernel: RawTensor * strides: int[] * padding: int[] -> RawTensor
+
+    /// Returns the 3D convolution of the tensor
+    abstract Conv3D: kernel: RawTensor * strides: int[] * padding: int[] -> RawTensor
+
+    /// Returns a view of the original tensor with its dimensions permuted
+    abstract PermuteT: permutation: int[] -> RawTensor
+
+    /// Returns the element-wise negation of the tensor
+    abstract NegT: unit -> RawTensor
+
+    /// Returns the scalar tensor for the summation of all elements in the tensor 
+    abstract SumT: ?resultType: Dtype -> RawTensor
+
+    /// Returns the tensor representing the summation of the tensor along the given dimension
+    abstract SumTDim: dim: int * ?resultType: Dtype -> RawTensor
+
+    /// Returns the transpose of the tensor between the given dimensions
+    abstract TransposeT: dim0: int * dim1: int -> RawTensor
+
+    /// Returns the transpose of a 2D tensor
+    abstract TransposeT2: unit -> RawTensor
+
+    /// Returns the inverse of a single square matrix (2d tensor) or a batch of square matrices (3d tensor)
+    abstract InverseT: unit -> RawTensor
+
+    /// Returns the determinant of a square matrix
+    abstract DetT: unit -> RawTensor
+
+    /// Returns the solution of single a square system of linear equations with a unique solution or a batch of several such systems
+    abstract SolveTT: RawTensor -> RawTensor
+    
+    /// Returns the tensor with the same values and the given dimension removed. The given dimension must be of size 1.
+    abstract SqueezeT: dim: int -> RawTensor
+
+    /// Returns the tensor with the same values and a dimension of size 1 inserted before the given dimension.
+    abstract UnsqueezeT: dim: int -> RawTensor
+
+    /// Returns the flip of the tensor along the given dimensions 
+    abstract FlipT: dims: int[] -> RawTensor
+
+    /// Returns the dilation of the tensor using the given dilations parameters
+    abstract DilateT: dilations: int[] -> RawTensor
+
+    /// Returns the reverse of the dilation of the tensor using the given dilations parameters
+    abstract UndilateT: dilations: int[] -> RawTensor
+
+    /// Returns the tensor with the same values viewed as a different shape
+    abstract ViewT: shape: Shape -> RawTensor
+
+    /// Returns the element-wise sign of the tensor
+    abstract SignT: unit -> RawTensor
+
+    /// Returns the element-wise integer floor of the tensor
+    abstract FloorT: unit -> RawTensor
+
+    /// Returns the element-wise integer ceiling of the tensor
+    abstract CeilT: unit -> RawTensor
+
+    /// Returns the element-wise rounding of the tensor
+    abstract RoundT: unit -> RawTensor
+
+    /// Returns the element-wise absolute value of the tensor
+    abstract AbsT: unit -> RawTensor
+
+    /// Returns the element-wise ReLU of the tensor
+    abstract ReluT: unit -> RawTensor
+
+    /// Returns the element-wise softplus of the tensor
+    abstract SoftplusT: unit -> RawTensor
+
+    /// Returns the element-wise sigmoid of the tensor
+    abstract SigmoidT: unit -> RawTensor
+
+    /// Returns the element-wise natural exponentiation of the tensor
+    abstract ExpT: unit -> RawTensor
+
+    /// Returns the element-wise natural logarithm of the tensor
+    abstract LogT: unit -> RawTensor
+
+    /// Returns the element-wise base10 logarithm of the tensor
+    abstract Log10T: unit -> RawTensor
+
+    /// Returns the element-wise square root of the tensor
+    abstract SqrtT: unit -> RawTensor
+
+    /// Returns the element-wise sine of the tensor
+    abstract SinT: unit -> RawTensor
+
+    /// Returns the element-wise cosine of the tensor
+    abstract CosT: unit -> RawTensor
+
+    /// Returns the element-wise tangent of the tensor
+    abstract TanT: unit -> RawTensor
+
+    /// Returns the element-wise sinh of the tensor
+    abstract SinhT: unit -> RawTensor
+
+    /// Returns the element-wise cosh of the tensor
+    abstract CoshT: unit -> RawTensor
+
+    /// Returns the element-wise tanh of the tensor
+    abstract TanhT: unit -> RawTensor
+
+    /// Returns the element-wise asin of the tensor
+    abstract AsinT: unit -> RawTensor
+
+    /// Returns the element-wise cos of the tensor
+    abstract AcosT: unit -> RawTensor
+
+    /// Returns the element-wise atan of the tensor
+    abstract AtanT: unit -> RawTensor
+
+    default t.IsInfT() =
+        match t.Dtype with 
+        | Dtype.IntegralOrBool -> t.FullLike(t.Shape, false, dtype=Dtype.Bool)
+        | _ -> t.AbsT().EqTT(t.FullLike(t.Shape,System.Single.PositiveInfinity))
+
+    default t.IsNaNT() =
+        match t.Dtype with 
+        | Dtype.IntegralOrBool -> t.FullLike(t.Shape, false, dtype=Dtype.Bool)
+        | _ -> t.NeqTT(t)
+
+    member t.Print(?postfix: string) =
+        // TODO: this code is not ideal and can be reimplemented to be cleaner and more efficient
+        let postfix = defaultArg postfix ""
+        if t.Nelement = 0 then sprintf "tensor([])%s" postfix
+        else
+        let threshold = Printer.Default.threshold
+        let edgeItems = Printer.Default.edgeItems
+        let precision = Printer.Default.precision
+
+        let vmin = t.GetItem(t.MinIndexT()).toDouble()
+        let vmax = t.GetItem(t.MaxIndexT()).toDouble()
+        let absMax = max (abs vmin) (abs vmax)
+        let precisionStr = (String.replicate precision "0")
+        let floatMaxStrLen1 = System.String.Format("{0:G"+precision.ToString()+"}", absMax).Length
+        let floatMaxStrLen2 = System.String.Format("{0:0."+precisionStr+"}", absMax).Length
+        let floatFormat1 = "{0,"+floatMaxStrLen1.ToString()+":G"+precision.ToString()+"}"
+        let floatFormat2 = "{0,"+floatMaxStrLen2.ToString()+":0."+precisionStr+"}"
+        let floatFormat3 = "{0,"+floatMaxStrLen2.ToString()+": 0."+precisionStr+";-0."+precisionStr+"}"
+        let floatNoDecimals = t.Dtype.IsFloatingPoint && (let tt = t.Cast(Dtype.Float64) in tt.CeilT().Equals(tt))
+        let floatNonNegative = t.Dtype.IsFloatingPoint && (let tt = t.Cast(Dtype.Float64) in tt.AbsT().Equals(tt))
+        let printFloat (v:float) =
+            if absMax >= 1.e8 || floatNoDecimals then
+                let p = System.String.Format(floatFormat1, v)
+                if p.Contains(".") || p.Contains("e") || p.Contains("E") || p.Contains("NaN") || p.Contains("Inf") || p.Contains("∞") then p else p + "."
+            elif floatNonNegative then
+                System.String.Format(floatFormat2, v)
+            else
+                System.String.Format(floatFormat3, v)
+
+        let intMaxStrLen = System.String.Format("{0:D}", int64 (if vmin < 0. then -absMax else absMax)).Length
+        let intFormat = "{0,"+intMaxStrLen.ToString()+":D}"
+        let printInt (v:int64) =
+            System.String.Format(intFormat, v)
+
+        let printVal (x:scalar) = 
+            match x.GetTypeCode() with 
+            | TypeCode.Single -> printFloat (x.toDouble())
+            | TypeCode.Double -> printFloat (x.toDouble())
+            | TypeCode.Int32 -> printInt (x.toInt64())
+            | TypeCode.Int64 -> printInt (x.toInt64())
+            | TypeCode.Byte -> printInt (x.toInt64())
+            | TypeCode.SByte -> printInt (x.toInt64())
+            | TypeCode.Int16 -> printInt (x.toInt64())
+            | TypeCode.Boolean -> if (x.toBool()) then " true" else "false"
+            | _ -> printFloat (x.toDouble()) // Handles Float16, BFloat16
+
+        let sb = System.Text.StringBuilder()
+        sb.Append("tensor(") |> ignore
+        match t.Dim with
+        | 0 ->
+            sb.Append(printVal (t.ToScalar())) |> ignore
+        | _ ->
+            let rec print (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    sb.Append("[") |> ignore
+                    let mutable prefix = ""
+                    if (shape[0] >= threshold) && (edgeItems*2 < shape[0]) then
+                        for i=0 to edgeItems-1 do
+                            let globalCoords = Array.append externalCoords [|i|]
+                            sb.Append(prefix) |> ignore
+                            sb.Append(printVal (t.GetItem(globalCoords))) |> ignore
+                            prefix <- ", "
+                        sb.Append(", ...") |> ignore
+                        for i=shape[0]-edgeItems to shape[0]-1 do
+                            let globalCoords = Array.append externalCoords [|i|]
+                            sb.Append(prefix) |> ignore
+                            sb.Append(printVal (t.GetItem(globalCoords))) |> ignore
+                            // prefix <- ", "
+                    else
+                        for i=0 to shape[0]-1 do
+                            let globalCoords = Array.append externalCoords [|i|]
+                            sb.Append(prefix) |> ignore
+                            sb.Append(printVal (t.GetItem(globalCoords))) |> ignore
+                            prefix <- ", "
+                    sb.Append("]") |> ignore
+                else
+                    sb.Append("[") |> ignore
+                    let mutable prefix = ""
+                    let prefix2 = sprintf ",%s%s" (String.replicate (max 1 (shape.Length-1)) "\n       ") (String.replicate (externalCoords.Length+1) " ")
+                    if (shape[0] >= threshold) && (edgeItems*2 < shape[0]) then
+                        for i=0 to edgeItems-1 do
+                            sb.Append(prefix) |> ignore
+                            print shape[1..] (Array.append externalCoords [|i|])
+                            prefix <- prefix2
+                        sb.Append(prefix) |> ignore
+                        sb.Append("...") |> ignore
+                        for i=shape[0]-edgeItems to shape[0]-1 do
+                            sb.Append(prefix) |> ignore
+                            print shape[1..] (Array.append externalCoords [|i|])
+                            // prefix <- prefix2
+                    else
+                        for i=0 to shape[0]-1 do
+                            sb.Append(prefix) |> ignore
+                            print shape[1..] (Array.append externalCoords [|i|])
+                            prefix <- prefix2
+                    sb.Append("]") |> ignore
+            print t.Shape [||]
+        if t.Dtype <> Dtype.Default then
+            sb.Append ",dtype=" |> ignore
+            sb.Append (t.Dtype.ToString()) |> ignore
+        if t.Device <> Device.Default then
+            sb.Append ",device=" |> ignore
+            sb.Append (t.Device.ToString()) |> ignore
+        if t.Backend <> Backend.Default then
+            sb.Append ",backend=" |> ignore
+            sb.Append (t.Backend.ToString()) |> ignore
+        sb.Append(")") |> ignore
+        sb.Append(postfix) |> ignore
+        sb.ToString()
+
+    override x.Equals(yobj: obj) = 
+        match yobj with
+        | :? RawTensor as y -> x.Equals(y)
+        | _ -> false
+
+    override x.GetHashCode() = x.ComputeHash()
+
+    interface System.IComparable with 
+        member x.CompareTo(yobj) =
+            match yobj with
+            | :? RawTensor as y -> Unchecked.compare (x.ToScalar()) (y.ToScalar())
+            | _ -> failwithf "Cannot compare RawTensor with object of type %A" (yobj.GetType())
+
+    default t.GetItem(indexes) =
+        let t0 = t.GetSlice(Array2D.init indexes.Length 3 (fun i j -> if j = 0 || j = 1 then indexes[i] else 1))
+        t0.ToScalar()
+
+    /// Returns a .NET object for the value of a scalar tensor
+    override t.ToScalar() =
+        match t.Nelement with
+        | 1 -> t.ViewT([||]).ToValues() :?> scalar
+        | _ -> failwithf "Only one element tensors can be converted to scalars. This tensor has shape %A." t.Shape
+
+    /// Returns a .NET array object for the values of a non-scalar tensor
+    member t.ToArray() =
+        match t.Dim with
+        | 0 -> failwithf "Cannot convert scalar tensor to array"
+        | _ ->
+            match t.ToValues() with 
+            | :? System.Array as a -> a
+            | _ -> failwithf "ToValues() should return an array but returned type %A" (t.GetType())
+
+    /// A backdoor to switch this tensor to be usable as a mutable tensor. You should have a unique handle to
+    /// this tensor for the entire time it is being used as a mutable tensor.
+    abstract SetMutable: unit -> unit
+
+    abstract IsMutable: bool
+
+    /// Modifies the tensor by with values constrained by the corresponding elements in the low/high tensors.
+    abstract ClampInPlace: low: RawTensor * high: RawTensor -> unit
+
+    /// Modifies the tensor by comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract LtInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract GtInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract LeInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract GeInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract EqInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by comparing each element pairwise with the corresponding element in <c>t2</c>
+    abstract NeqInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise addition of the two tensors
+    abstract AddInPlace: RawTensor * ?alpha: scalar -> unit
+
+    /// Modifies the tensor by the element-wise addition of two scalars
+    abstract AddScalarInPlace: b: scalar -> unit
+
+    /// Adds a slice of <c>t2</c> at the given location to the tensor
+    abstract AddSliceInPlace: location: int[] * t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise subtraction of two tensors
+    abstract SubInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise subtraction of the tensor and a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract SubScalarInPlace: b: scalar -> unit
+
+    /// Modifies the tensor by the element-wise multiplication of two tensors
+    abstract MulInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise multiplication of a tensor and a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract MulScalarInPlace: b: scalar -> unit
+
+    /// Modifies the tensor by the element-wise division of two tensors
+    abstract DivInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise division of a tensor by a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract DivScalarInPlace: t2: scalar  -> unit
+
+    /// Modifies the tensor by the element-wise exponentiation of two tensors
+    abstract PowInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise exponentiation of a tensor and a scalar, where the scalar is logically
+    /// broadcast to the same shape as the tensor
+    abstract PowScalarInPlace: t2: scalar -> unit
+
+    /// Modifies the tensor by the matrix multiplication of two tensors
+    abstract MatMulInPlace: t2: RawTensor -> unit
+
+    /// Modifies the tensor by the element-wise negation of the tensor
+    abstract NegInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise sign of the tensor
+    abstract SignInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise integer floor of the tensor
+    abstract FloorInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise integer ceiling of the tensor
+    abstract CeilInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise rounding of the tensor
+    abstract RoundInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise absolute value of the tensor
+    abstract AbsInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise ReLU of the tensor
+    abstract ReluInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise softplus of the tensor
+    abstract SoftplusInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise sigmoid of the tensor
+    abstract SigmoidInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise natural exponentiation of the tensor
+    abstract ExpInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise natural logarithm of the tensor
+    abstract LogInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise base10 logarithm of the tensor
+    abstract Log10InPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise square root of the tensor
+    abstract SqrtInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise sine of the tensor
+    abstract SinInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise cosine of the tensor
+    abstract CosInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise tangent of the tensor
+    abstract TanInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise sinh of the tensor
+    abstract SinhInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise cosh of the tensor
+    abstract CoshInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise tanh of the tensor
+    abstract TanhInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise asin of the tensor
+    abstract AsinInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise cos of the tensor
+    abstract AcosInPlace: unit -> unit
+
+    /// Modifies the tensor by the element-wise atan of the tensor
+    abstract AtanInPlace: unit -> unit
+
+    /// Modifies the tensor by setting all values to one
+    abstract OnesInPlace: unit -> unit
+
+    /// Modifies the tensor by setting all values to zero
+    abstract ZerosInPlace: unit -> unit
+
+    /// Modifies the tensor by setting it to random values taken from a uniform distribution in [0, 1).
+    abstract RandomInPlace: unit -> unit
+
+    /// Modifies the tensor by setting all values taken from a normal distribution with mean 0 and variance 1.
+    abstract RandomNormalInPlace: unit -> unit
+
+    /// Gets a tensor filled with random integers from the given range 
+    abstract RandomIntInPlace: low:int * high:int -> unit
+
diff --git a/src/TensorMath/Scalar.fs b/src/TensorMath/Scalar.fs
new file mode 100644
index 0000000..61b0ef0
--- /dev/null
+++ b/src/TensorMath/Scalar.fs
@@ -0,0 +1,77 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+open System
+open System.Reflection
+
+/// Represents a scalar on the TensorMath programming model
+type scalar = System.IConvertible
+
+[<AutoOpen>]
+module ScalarExtensions =
+    type System.IConvertible with
+        member inline x.toSingle() = x.ToSingle(null)
+        member inline x.toDouble() = x.ToDouble(null)
+        member inline x.toInt64() = x.ToInt64(null)
+        member inline x.toInt32() = x.ToInt32(null)
+        member inline x.toInt16() = x.ToInt16(null)
+        member inline x.toSByte() = x.ToSByte(null)
+        member inline x.toByte() = x.ToByte(null)
+        member inline x.toBool() = x.toInt32() <> 0 
+        member inline x.sub(y:scalar) : scalar = (x.toDouble() - y.toDouble()) :> scalar
+        member inline x.log() : scalar = x.toDouble() |> log :> scalar
+        member inline x.neg() : scalar = -x.toDouble() :> scalar
+        member inline x.dtype =
+            let ti = x.GetTypeCode()
+            match ti with 
+            | TypeCode.Double -> Dtype.Float64
+            | TypeCode.Single -> Dtype.Float32
+            | TypeCode.Int32 -> Dtype.Int32
+            | TypeCode.Int64 -> Dtype.Int64
+            | TypeCode.SByte -> Dtype.Int8
+            | TypeCode.Byte -> Dtype.Byte
+            | TypeCode.Int16 -> Dtype.Int16
+            | TypeCode.Boolean -> Dtype.Bool
+            | _ -> failwithf "unknown scalar type '%A'" x
+
+        member inline x.cast(dtype) =
+            match dtype with 
+            | Dtype.Float16 -> x.toSingle() :> scalar
+            | Dtype.BFloat16 -> x.toSingle() :> scalar
+            | Dtype.Float32 -> x.toSingle() :> scalar
+            | Dtype.Float64 -> x.toDouble() :> scalar
+            | Dtype.Int8 -> x.toSByte() :> scalar
+            | Dtype.Byte -> x.toByte() :> scalar
+            | Dtype.Int32 -> x.toInt32() :> scalar
+            | Dtype.Int64 -> x.toInt64() :> scalar
+            | Dtype.Int16 -> x.toInt16() :> scalar
+            | Dtype.Bool -> x.toBool() :> scalar
+
+    // Floating point scalars force integers to widen to the default floating point type
+    //
+    // For example:
+    //  >>> import torch
+    //  >>> (torch.tensor([1], dtype=torch.int32) * 2.5).dtype
+    //  torch.float32
+    //  >>> torch.set_default_dtype(torch.float16)
+    //  >>> (torch.tensor([1], dtype=torch.int32) * 2.5).dtype
+    //  torch.float16
+    //  >>> (torch.tensor([1], dtype=torch.int32) * 2).dtype
+    //  torch.int32
+    let tryWidenScalar (tensorDtype: Dtype) (scalar: scalar) =
+        match tensorDtype, scalar.GetTypeCode() with 
+        | Dtype.Integral, (TypeCode.Double | TypeCode.Single) -> ValueSome Dtype.Default
+        | _, _ -> ValueNone
+        
+    let widenScalarForDivision (tensorDtype: Dtype) (scalarDtype: Dtype) =
+        match tensorDtype.IsFloatingPoint, scalarDtype.IsFloatingPoint with
+        | false, false -> Dtype.Default
+        | false, true -> Dtype.Default
+        | true, false -> tensorDtype
+        | true, true -> tensorDtype
+
+        
\ No newline at end of file
diff --git a/src/TensorMath/Shape.fs b/src/TensorMath/Shape.fs
new file mode 100644
index 0000000..41581b5
--- /dev/null
+++ b/src/TensorMath/Shape.fs
@@ -0,0 +1,883 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+namespace TensorMath
+
+open TensorMath.Util
+
+/// Represents the shape of a tensor.
+type Shape = int[]
+
+/// Contains functions and values related to tensor shapes.
+module rec Shape =
+
+    /// Gets the total number of elements in the shape.
+    let nelement (shape: Shape) =
+        if shape.Length = 0 then 1
+        else Array.reduce (*) shape
+
+    /// The shape for a scalar value.
+    let scalar : Shape = [| |]
+
+    /// Indicates if one shape contains another.
+    let contains (bigShape:Shape) (smallShape: Shape) =
+        if bigShape.Length <> smallShape.Length then failwithf "Expecting bigShape (%A) and smallShape (%A) to have the same number of dimensions" bigShape.Length smallShape.Length
+        Array.map2 (<=) smallShape bigShape |> Array.forall id
+
+    /// Checks if the given shapes are appropriate for a stack operation and returns information related to the resulting shape.
+    let checkCanStack (shapes:Shape[]) (dim: int) =
+        if not (Seq.allEqual shapes) then failwithf "Cannot stack tensors with different shapes: %A" shapes
+        let n = shapes.Length
+        if n = 0 then failwithf "Expecting a non-empty sequence of tensors"
+        let shape = shapes[0]
+        if dim < 0 || dim > shape.Length then failwithf "Expecting 0 <= dim (%A) <= %A" dim shape.Length
+        if dim < 0 || dim > n then failwithf "Expecting 0 <= dim (%A) <= %A" dim n
+        let shape1 = shape[0..dim-1]
+        let shape2 = shape[dim..]
+        let outputShape = [| yield! shape1; yield n; yield! shape2 |]
+        n, shape1, shape2, outputShape
+
+    /// Checks if the given shapes are appropriate for a GetSlice operation and returns information related to the resulting shape.
+    let checkCanGetSlice (shape: Shape) (fullBounds: int[,]) =
+        if Array2D.length1 fullBounds <> shape.Length then failwithf "Expecting %i-by-3 fullBounds" shape.Length
+        let outputShape =
+            [|for i=0 to (fullBounds.GetLength(0) - 1) do
+                let len = fullBounds[i,1] - fullBounds[i,0] + 1
+                if fullBounds[i, 2] = 1 then
+                    if len > 1 then yield len // if len=1 then squeeze this dimension
+                else
+                    yield len|]
+        outputShape
+
+    /// Checks if the given index is valid in the context of the given shape.
+    let checkCanIndex (shape: int[]) (index: int[]) =
+        if shape.Length <> index.Length then failwithf "Expecting shape (%A) and index (%A) to have the same length" shape index
+        let valid = Array.forall2 (fun s i -> (i < s) && (i >= 0)) shape index
+        if not valid then failwithf "index (%A) is not valid for shape (%A)" index shape
+
+    /// Computes the shape that results from a dilation operation.
+    let dilated (shape: Shape) (dilations: int[]) =
+        Array.map2 (fun n d -> n + (n - 1) * (d - 1)) shape dilations
+
+    /// Checks if the given shapes are appropriate for a concatenation operation and returns information related to the resulting shape.
+    let checkCanCat (shapes: Shape[]) (dim: int) =
+        let n = shapes.Length
+        if n = 0 then invalidArg "tensors" "Expecting at least one tensor"
+        let shape = shapes[0]
+        if dim < 0 || dim >= shape.Length then invalidArg "dim" "invalid dimension"
+        let shape1 = shape[0..dim-1]
+        let shape3 = shape[dim+1..]
+        if shapes |> Array.exists (fun shapeOther -> shapeOther[0..dim-1] <> shape1 || shapeOther[dim+1..] <> shape3) then
+            invalidArg "tensors" "Expecting tensors with similar shapes"
+        let m2 = shapes |> Array.sumBy (fun shape -> shape[dim])
+        let outputShape = [| yield! shape1; yield m2; yield! shape3 |]
+        n, shape1, m2, shape3, outputShape
+
+    /// Checks if the given shapes are appropriate for a split operation and returns information related to the resulting shape.
+    let checkCanSplit (shape: Shape) (sizes: int[]) (dim: int) =
+        if dim < 0 || dim >= shape.Length then invalidArg "dim" "invalid dimension"
+        if Array.sum sizes <> shape[dim] then invalidArg "sizes" "the sum of sizes must equal the relevant dimension"
+        let shape1 = shape[0..dim-1]
+        let shape2 = shape[dim+1..]
+        let outputShapes = sizes |> Array.map (fun sz -> [| yield! shape1; yield sz; yield! shape2 |])
+        outputShapes
+
+    /// Checks if the given shapes are appropriate for an unstack operation and returns information related to the resulting shape.
+    let checkCanUnstack (shape: Shape) (dim: int) =
+        if shape.Length < 1 then failwith "Cannot unstack scalar Tensor (dim < 1)"
+        if dim < 0 || dim >= shape.Length then invalidArg "dim" "invalid dimension"
+        let shape1 = shape[0..dim-1]
+        let shape2 = shape[dim+1..]
+        let outputShape = Array.append shape1 shape2
+        shape1, shape2, outputShape
+
+    /// Checks if the given shapes are appropriate for a transpose operation and returns information related to the resulting shape.
+    let computeTranspose2d (shape: Shape) =
+        let nrows = shape[0]
+        let ncols = shape[1]
+        let outputShape = [| ncols; nrows |]
+        outputShape
+
+    /// Checks if the two device types are equal.
+    let checkDeviceTypes (deviceType1: DeviceType) (deviceType2: DeviceType) =
+        if deviceType1 <> deviceType2 then failwithf "Expecting input device types %A and %A to be the same" deviceType1 deviceType2
+
+    /// Checks if the two tensor element types are equal.
+    let checkDtypes (dtype1: Dtype) (dtype2: Dtype) =
+        if dtype1 <> dtype2 then failwithf "Expecting input tensor types %A and %A to be the same" dtype1 dtype2
+
+    /// Check if the tensor element type is appropriate for a convolution operation.
+    let private checkConvDType op (dtype: Dtype) =
+        match dtype with
+        | Dtype.Bool -> opNotSupported op dtype
+        | _ -> ()
+
+    /// Checks if the given shapes are appropriate for a convolution operation and returns information related to the resulting shape.
+    let checkCanConv1d (deviceType1: DeviceType) (deviceType2: DeviceType) (dtype1: Dtype) (dtype2: Dtype) (shape1:Shape) (shape2:Shape) (stride: int) (padding: int) (dilation: int) =
+        checkDeviceTypes deviceType1 deviceType2
+        checkDtypes dtype1 dtype2
+        checkConvDType "conv1d" dtype1
+        if shape1.Length <> 3 || shape2.Length <> 3 then failwithf "Expecting two 3d tensors t1, t2 where t1 is input (NxCxI: batchSize x inputChannels x inputLength) and t2 is filters (KxCxF: outputChannels x inputChannels x kernelLength), received tensors with shapes %A, %A" shape1 shape2
+        if padding < 0 then failwithf "Expecting padding (%A) >= 0" padding
+        if stride < 1 then failwithf "Expecting stride (%A) >= 1" stride
+        if dilation < 1 then failwithf "Expecting dilation (%A) >=1" dilation
+        let batchSize = shape1[0]
+        let inputChannels = shape1[1]
+        let inputLength = shape1[2]
+        let outputChannels = shape2[0]
+        let filtersChannels = shape2[1]
+        let kernelLength = shape2[2]
+        let inputLengthAfterPadding = inputLength + 2*padding
+        if shape2[1] <> inputChannels then failwithf "Input and filters have different number of channels: %A, %A" inputChannels filtersChannels
+        if kernelLength > inputLengthAfterPadding then failwithf "Expecting kernelLength (%A) <= inputLengthAfterPadding (%A)" kernelLength inputLengthAfterPadding
+        let outputSize = int (floor (float (inputLengthAfterPadding - kernelLength)/(float stride))) + 1
+        let outputShape = [|batchSize; outputChannels; outputSize|]
+        batchSize, inputChannels, kernelLength, outputChannels, outputSize, outputShape
+
+    /// Checks if the given shapes are appropriate for a convolution operation and returns information related to the resulting shape.
+    let checkCanConv2d (deviceType1: DeviceType) (deviceType2: DeviceType) (dtype1: Dtype) (dtype2: Dtype) (shape1: Shape) (shape2: Shape) (strides: int[]) (paddings: int[]) (dilations: int[]) =
+        checkDeviceTypes deviceType1 deviceType2
+        checkDtypes dtype1 dtype2
+        checkConvDType "conv2d" dtype1
+        if shape1.Length <> 4 || shape2.Length <> 4 then failwithf "Expecting two 4d tensors t1, t2 where t1 is input, NxCxHxW (batchSize x inputChannels x inputHeight x inputWidth) and t2 is filters, KxCxFxG (outputChannels x inputChannels x kernelHeight x kernelWidth), received tensors with shapes %A, %A" shape1 shape2
+        if strides.Length <> 2 then failwithf "Expecting strides (%A) to be a two-dimensional array" strides
+        if paddings.Length <> 2 then failwithf "Expecting paddings (%A) to be a two-dimensional array" paddings
+        if dilations.Length <> 2 then failwithf "Expecting dilations (%A) to be a two-dimensional array" dilations
+        if paddings[0] < 0 || paddings[1] < 0 then failwithf "Expecting all paddings (%A) >= 0" paddings
+        if strides[0] < 1 || strides[1] < 1 then failwithf "Expecting all strides (%A) >= 1" strides
+        if dilations[0] < 1 || dilations[1] < 1 then failwithf "Expecting all dilations (%A) >= 1" dilations
+        let batchSize = shape1[0]
+        let inputChannels = shape1[1]
+        let inputHeight = shape1[2]
+        let inputWidth = shape1[3]
+        let outputChannels = shape2[0]
+        let filtersChannels = shape2[1]
+        let kernelHeight = shape2[2]
+        let kernelWidth = shape2[3]
+        let inputHeightAfterPadding = inputHeight + 2*paddings[0]
+        let inputWidthAfterPadding = inputWidth + 2*paddings[1]
+        if filtersChannels <> inputChannels then failwithf "Input and filters have different number of channels: %A, %A" inputChannels filtersChannels
+        if kernelHeight > inputHeightAfterPadding then failwithf "Expecting kernelHeight (%A) <= inputHeightAfterPadding (%A)" kernelHeight inputHeightAfterPadding
+        if kernelWidth > inputWidthAfterPadding then failwithf "Expecting kernelWidth (%A) <= inputWidthAfterPadding (%A)" kernelWidth inputWidthAfterPadding
+        let outputHeight = int (floor (float (inputHeightAfterPadding - kernelHeight)/(float strides[0]))) + 1
+        let outputWidth = int (floor (float (inputWidthAfterPadding - kernelWidth)/(float strides[1]))) + 1
+        let outputShape = [|batchSize; outputChannels; outputHeight; outputWidth|]
+        batchSize, inputChannels, (kernelHeight, kernelWidth), (outputChannels, outputHeight, outputWidth), outputShape
+
+    /// Checks if the given shapes are appropriate for a convolution operation and returns information related to the resulting shape.
+    let checkCanConv3d (deviceType1: DeviceType) (deviceType2: DeviceType) (dtype1: Dtype) (dtype2: Dtype) (shape1: Shape) (shape2: Shape) (strides: int[]) (paddings: int[]) (dilations: int[]) =
+        checkDeviceTypes deviceType1 deviceType2
+        checkDtypes dtype1 dtype2
+        checkConvDType "conv3d" dtype1
+        if shape1.Length <> 5 || shape2.Length <> 5 then failwithf "Expecting two 4d tensors t1, t2 where t1 is input, NxCxDxHxW (batchSize x inputChannels x inputDepth x inputHeight x inputWidth) and t2 is filters, KxCxExFxG (outputChannels x inputChannels x kernelDepth x kernelHeight x kernelWidth), received tensors with shapes %A, %A" shape1 shape2
+        if strides.Length <> 3 then failwithf "Expecting strides (%A) to be a length-three array" strides
+        if paddings.Length <> 3 then failwithf "Expecting paddings (%A) to be a length-three array" paddings
+        if dilations.Length <> 3 then failwithf "Expecting dilations (%A) to be a length-three array" dilations
+        if paddings[0] < 0 || paddings[1] < 0 || paddings[2] < 0 then failwithf "Expecting all paddings (%A) >= 0" paddings
+        if strides[0] < 1 || strides[1] < 1 || strides[2] < 1 then failwithf "Expecting all strides (%A) >= 1" strides
+        if dilations[0] < 1 || dilations[1] < 1 || dilations[2] < 1 then failwithf "Expecting all dilations (%A) >= 1" dilations
+        let batchSize = shape1[0]
+        let inputChannels = shape1[1]
+        let inputDepth = shape1[2]
+        let inputHeight = shape1[3]
+        let inputWidth = shape1[4]
+        let outputChannels = shape2[0]
+        let filtersChannels = shape2[1]
+        let kernelDepth = shape2[2]
+        let kernelHeight = shape2[3]
+        let kernelWidth = shape2[4]
+        let inputDepthAfterPadding = inputDepth + 2*paddings[0]
+        let inputHeightAfterPadding = inputHeight + 2*paddings[1]
+        let inputWidthAfterPadding = inputWidth + 2*paddings[2]
+        if filtersChannels <> inputChannels then failwithf "Input and filters have different number of channels: %A, %A" inputChannels filtersChannels
+        if kernelDepth > inputDepthAfterPadding then failwithf "Expecting kernelDepth (%A) <= inputDepthAfterPadding (%A)" kernelDepth inputDepthAfterPadding
+        if kernelHeight > inputHeightAfterPadding then failwithf "Expecting kernelHeight (%A) <= inputHeightAfterPadding (%A)" kernelHeight inputHeightAfterPadding
+        if kernelWidth > inputWidthAfterPadding then failwithf "Expecting kernelWidth (%A) <= inputWidthAfterPadding (%A)" kernelWidth inputWidthAfterPadding
+        let outputDepth = int (floor (float (inputDepthAfterPadding - kernelDepth)/(float strides[0]))) + 1
+        let outputHeight = int (floor (float (inputHeightAfterPadding - kernelHeight)/(float strides[1]))) + 1
+        let outputWidth = int (floor (float (inputWidthAfterPadding - kernelWidth)/(float strides[2]))) + 1
+        let outputShape = [|batchSize; outputChannels; outputDepth; outputHeight; outputWidth|]
+        batchSize, inputChannels, (kernelDepth, kernelHeight, kernelWidth), (outputChannels, outputDepth, outputHeight, outputWidth), outputShape
+
+    /// Checks if the given shapes are appropriate for a transposed convolution operation and returns information related to the resulting shape.
+    let checkCanConvTranspose1d (deviceType1: DeviceType) (deviceType2: DeviceType) (dtype1: Dtype) (dtype2: Dtype) (shape1: Shape) (shape2: Shape) (stride: int) (padding: int) (dilation: int) (outputPadding: int) =
+        checkDeviceTypes deviceType1 deviceType2
+        checkDtypes dtype1 dtype2
+        checkConvDType "convTranspose1d" dtype1
+        if shape1.Length <> 3 || shape2.Length <> 3 then failwithf "Expecting two 3d tensors t1, t2 where t1 is input (NxCxI: batchSize x inputChannels x inputLength) and t2 is filters (KxCxF: outputChannels x inputChannels x kernelLength), received tensors with shapes %A, %A" shape1 shape2
+        if padding < 0 then failwithf "Expecting padding (%A) >= 0" padding
+        if stride < 1 then failwithf "Expecting stride (%A) >= 1" stride
+        if dilation < 1 then failwithf "Expecting dilation (%A) >=1" dilation
+        if outputPadding < 0 then failwithf "Expecting outputPadding (%A) >= 0" outputPadding
+        let batchSize = shape1[0]
+        let inputChannels = shape1[1]
+        let inputLength = shape1[2]
+        let outputChannels = shape2[1]
+        let filtersChannels = shape2[0]
+        let kernelLength = shape2[2]
+        let kernelShape = [|kernelLength|]
+        let kernelShapeAfterDilation = dilated kernelShape [|dilation|]
+        let kernelLength = kernelShapeAfterDilation[0]
+        if filtersChannels <> inputChannels then failwithf "Input and filters have different number of channels: %A, %A" inputChannels filtersChannels
+        let outputSize = stride * (inputLength - 1) + kernelLength - 2 * padding + outputPadding
+        let outputShape = [|batchSize; outputChannels; outputSize|]
+        batchSize, inputChannels, kernelLength, outputChannels, outputSize, outputShape
+
+    /// Checks if the given shapes are appropriate for a transposed convolution operation and returns information related to the resulting shape.
+    let checkCanConvTranspose2d (deviceType1: DeviceType) (deviceType2: DeviceType) (dtype1: Dtype) (dtype2: Dtype) (shape1: Shape) (shape2: Shape) (strides: int[]) (paddings: int[]) (dilations: int[]) (outputPaddings: int[]) =
+        checkDeviceTypes deviceType1 deviceType2
+        checkDtypes dtype1 dtype2
+        checkConvDType "convTranspose2d" dtype1
+        if shape1.Length <> 4 || shape2.Length <> 4 then failwithf "Expecting two 4d tensors t1, t2 where t1 is input, NxCxHxW (batchSize x inputChannels x inputHeight x inputWidth) and t2 is filters, KxCxFxG (outputChannels x inputChannels x kernelHeight x kernelWidth), received tensors with shapes %A, %A" shape1 shape2
+        if strides.Length <> 2 then failwithf "Expecting strides (%A) to be a length-two array" strides
+        if paddings.Length <> 2 then failwithf "Expecting paddings (%A) to be a length-two array" paddings
+        if dilations.Length <> 2 then failwithf "Expecting dilations (%A) to be a length-two array" dilations
+        if outputPaddings.Length <> 2 then failwithf "Expecting outputPaddings (%A) to be a length-two array" outputPaddings
+        if paddings[0] < 0 || paddings[1] < 0 then failwithf "Expecting all paddings (%A) >= 0" paddings
+        if strides[0] < 1 || strides[1] < 1 then failwithf "Expecting all strides (%A) >= 1" strides
+        if dilations[0] < 1 || dilations[1] < 1 then failwithf "Expecting all dilations (%A) >= 1" dilations
+        if outputPaddings[0] < 0 || outputPaddings[1] < 0 then failwithf "Expecting all outputPaddings (%A) >= 0" outputPaddings
+        let batchSize = shape1[0]
+        let inputChannels = shape1[1]
+        let inputHeight = shape1[2]
+        let inputWidth = shape1[3]
+        let outputChannels = shape2[1]
+        let filtersChannels = shape2[0]
+        let kernelHeight = shape2[2]
+        let kernelWidth = shape2[3]
+        let kernelShape = [|kernelHeight; kernelWidth|]
+        let kernelShapeAfterDilation = dilated kernelShape dilations
+        let kernelHeight = kernelShapeAfterDilation[0]
+        let kernelWidth = kernelShapeAfterDilation[1]
+        if filtersChannels <> inputChannels then failwithf "Input and filters have different number of channels: %A, %A" inputChannels filtersChannels
+        let outputHeight = strides[0] * (inputHeight - 1) + kernelHeight - 2 * paddings[0] + outputPaddings[0]
+        let outputWidth = strides[1] * (inputWidth - 1) + kernelWidth - 2 * paddings[1] + outputPaddings[1]
+        let outputShape = [|batchSize; outputChannels; outputHeight; outputWidth|]
+        batchSize, inputChannels, (kernelHeight, kernelWidth), (outputChannels, outputHeight, outputWidth), outputShape
+
+    /// Checks if the given shapes are appropriate for a transposed convolution operation and returns information related to the resulting shape.
+    let checkCanConvTranspose3d (deviceType1: DeviceType) (deviceType2: DeviceType) (dtype1: Dtype) (dtype2: Dtype) (shape1: Shape) (shape2: Shape) (strides: int[]) (paddings: int[]) (dilations: int[]) (outputPaddings: int[]) =
+        checkDeviceTypes deviceType1 deviceType2
+        checkDtypes dtype1 dtype2
+        checkConvDType "convTranspose3d" dtype1
+        if shape1.Length <> 5 || shape2.Length <> 5 then failwithf "Expecting two 4d tensors t1, t2 where t1 is input, NxCxDxHxW (batchSize x inputChannels x inputDepth x inputHeight x inputWidth) and t2 is filters, KxCxExFxG (outputChannels x inputChannels x kernelDepth x kernelHeight x kernelWidth), received tensors with shapes %A, %A" shape1 shape2
+        if strides.Length <> 3 then failwithf "Expecting strides (%A) to be a length-three array" strides
+        if paddings.Length <> 3 then failwithf "Expecting paddings (%A) to be a length-three array" paddings
+        if dilations.Length <> 3 then failwithf "Expecting dilations (%A) to be a length-three array" dilations
+        if outputPaddings.Length <> 3 then failwithf "Expecting outputPaddings (%A) to be a length-three array" outputPaddings
+        if paddings[0] < 0 || paddings[1] < 0 || paddings[2] < 0 then failwithf "Expecting all paddings (%A) >= 0" paddings
+        if strides[0] < 1 || strides[1] < 1 || strides[2] < 1 then failwithf "Expecting all strides (%A) >= 1" strides
+        if dilations[0] < 1 || dilations[1] < 1 || dilations[2] < 1 then failwithf "Expecting all dilations (%A) >= 1" dilations
+        if outputPaddings[0] < 0 || outputPaddings[1] < 0 || outputPaddings[2] < 0 then failwithf "Expecting all outputPaddings (%A) >= 0" outputPaddings
+        let batchSize = shape1[0]
+        let inputChannels = shape1[1]
+        let inputDepth = shape1[2]
+        let inputHeight = shape1[3]
+        let inputWidth = shape1[4]
+        let outputChannels = shape2[1]
+        let filtersChannels = shape2[0]
+        let kernelDepth = shape2[2]
+        let kernelHeight = shape2[3]
+        let kernelWidth = shape2[4]
+        let kernelShape = [|kernelDepth; kernelHeight; kernelWidth|]
+        let kernelShapeAfterDilation = dilated kernelShape dilations
+        let kernelDepth = kernelShapeAfterDilation[0]
+        let kernelHeight = kernelShapeAfterDilation[1]
+        let kernelWidth = kernelShapeAfterDilation[2]
+        if filtersChannels <> inputChannels then failwithf "Input and filters have different number of channels: %A, %A" inputChannels filtersChannels
+        let outputDepth = strides[0] * (inputDepth - 1) + kernelDepth - 2 * paddings[0] + outputPaddings[0]
+        let outputHeight = strides[1] * (inputHeight - 1) + kernelHeight - 2 * paddings[1] + outputPaddings[1]
+        let outputWidth = strides[2] * (inputWidth - 1) + kernelWidth - 2 * paddings[2] + outputPaddings[2]
+        let outputShape = [|batchSize; outputChannels; outputDepth; outputHeight; outputWidth|]
+        batchSize, inputChannels, (kernelDepth, kernelHeight, kernelWidth), (outputChannels, outputDepth, outputHeight, outputWidth), outputShape
+
+    /// Checks if the given shapes are appropriate for a maxpool operation and returns information related to the resulting shape.
+    let checkCanMaxOrAvgpool1d nm (dtype: Dtype) (shape: Shape) (kernelSize: int) (stride: int) (padding: int) =
+        match dtype with
+        | Dtype.Bool | Dtype.Integral -> opNotSupported nm dtype
+        | _ ->
+        if shape.Length <> 3 then failwithf "Expecting a 3d tensor (NxCxL: batchSize x inputChannels x inputLength), received tensor with shape %A" shape
+        if kernelSize < 1 then failwithf "Expecting kernelSize (%A) >= 1" kernelSize
+        if padding < 0 then failwithf "Expecting padding (%A) >= 0" padding
+        if padding > kernelSize/2 then failwithf "Expecting padding (%A) < kernelSize (%A) / 2" padding kernelSize
+        if stride < 1 then failwithf "Expecting stride (%A) >= 1" stride
+        let batchSize = shape[0]
+        let channels = shape[1]
+        let inputSize = shape[2]
+        let inputLengthAfterPadding = inputSize + 2*padding
+        if kernelSize > inputLengthAfterPadding then failwithf "Expecting kernelSize (%A) <= inputLengthAfterPadding (%A)" kernelSize inputLengthAfterPadding
+        let outputSize = int (floor (float (inputLengthAfterPadding - kernelSize)/(float stride))) + 1
+        let outputShape = [|batchSize; channels; outputSize|]
+        batchSize, channels, inputSize, outputSize, outputShape
+
+    /// Checks if the given shapes are appropriate for a maxpool operation and returns information related to the resulting shape.
+    let checkCanMaxpool1d dtype shape kernelSize stride padding =
+        checkCanMaxOrAvgpool1d "maxpool1d" dtype shape kernelSize stride padding
+
+    /// Checks if the given shapes are appropriate for an avgpool operation and returns information related to the resulting shape.
+    let checkCanAvgpool1d dtype shape kernelSize stride padding =
+        checkCanMaxOrAvgpool1d "maxpool1d" dtype shape kernelSize stride padding
+
+    /// Checks if the given shapes are appropriate for a maxpool operation and returns information related to the resulting shape.
+    let checkCanMaxOrAvgpool2d nm (dtype: Dtype) (shape: Shape) (kernelSize: int[]) (strides: int[]) (paddings: int[]) =
+        match dtype with
+        | Dtype.Bool | Dtype.Integral -> opNotSupported nm dtype
+        | _ ->
+        if shape.Length <> 4 then failwithf "Expecting a 4d tensor (NxCxHxW: batchSize x inputChannels x inputHeight x inputWidth), received tensor with shape %A" shape
+        if kernelSize[0] < 1 || kernelSize[1] < 1 then failwithf "Expecting all kernelSizes (%A) >= 1" kernelSize
+        if paddings[0] < 0 || paddings[1] < 0 then failwithf "Expecting all paddings (%A) >= 0" paddings
+        if paddings[0] > kernelSize[0]/2 || paddings[1] > kernelSize[1]/2 then failwithf "Expecting all paddings (%A) < kernelSizes (%A) / 2" paddings kernelSize
+        if strides[0] < 1 || strides[1] < 1 then failwithf "Expecting all strides (%A) >= 1" strides
+        let batchSize = shape[0]
+        let channels = shape[1]
+        let inputHeight = shape[2]
+        let inputWidth = shape[3]
+        let kernelHeight = kernelSize[0]
+        let kernelWidth = kernelSize[1]
+        let inputHeightAfterPadding = inputHeight + 2*paddings[0]
+        let inputWidthAfterPadding = inputWidth + 2*paddings[1]
+        if kernelSize[0] > inputHeightAfterPadding then failwithf "Expecting kernelSize[0] (%A) <= inputHeightAfterPadding (%A)" kernelSize[0] inputHeightAfterPadding
+        if kernelSize[1] > inputWidthAfterPadding then failwithf "Expecting kernelSize[1] (%A) <= inputWidthAfterPadding (%A)" kernelSize[1] inputWidthAfterPadding
+        let outputHeight = int (floor (float (inputHeightAfterPadding - kernelHeight)/(float strides[0]))) + 1
+        let outputWidth = int (floor (float (inputWidthAfterPadding - kernelWidth)/(float strides[1]))) + 1
+        let outputShape = [|batchSize; channels; outputHeight; outputWidth|]
+        (batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), outputShape)
+
+    /// Checks if the given shapes are appropriate for a maxpool operation and returns information related to the resulting shape.
+    let checkCanMaxpool2d dtype shape kernelSize strides paddings =
+        checkCanMaxOrAvgpool2d "maxpool2d" dtype shape kernelSize strides paddings
+
+    /// Checks if the given shapes are appropriate for an avgpool operation and returns information related to the resulting shape.
+    let checkCanAvgpool2d dtype shape kernelSize strides paddings =
+        checkCanMaxOrAvgpool2d "avgpool2d" dtype shape kernelSize strides paddings
+
+    /// Checks if the given shapes are appropriate for a maxpool operation and returns information related to the resulting shape.
+    let checkCanMaxOrAvgpool3d nm (dtype: Dtype) (shape: Shape) (kernelSize: int[]) (strides: int[]) (paddings: int[]) =
+        match dtype with
+        | Dtype.Bool | Dtype.Integral -> opNotSupported nm dtype
+        | _ ->
+        if shape.Length <> 5 then failwithf "Expecting a 5d tensor (NxCxDxHxW: batchSize x inputChannels x inputDepth x inputHeight x inputWidth), received tensor with shape %A" shape
+        if kernelSize[0] < 1 || kernelSize[1] < 1 || kernelSize[2] < 1 then failwithf "Expecting all kernelSizes (%A) >= 1" kernelSize
+        if paddings[0] < 0 || paddings[1] < 0 || paddings[2] < 0 then failwithf "Expecting all paddings (%A) >= 0" paddings
+        if paddings[0] > kernelSize[0]/2 || paddings[1] > kernelSize[1]/2 || paddings[2] > kernelSize[2]/2 then failwithf "Expecting all paddings (%A) < kernelSizes (%A) / 2" paddings kernelSize
+        if strides[0] < 1 || strides[1] < 1 || strides[2] < 1 then failwithf "Expecting all strides (%A) >= 1" strides
+        let batchSize = shape[0]
+        let channels = shape[1]
+        let inputDepth = shape[2]
+        let inputHeight = shape[3]
+        let inputWidth = shape[4]
+        let kernelDepth = kernelSize[0]
+        let kernelHeight = kernelSize[1]
+        let kernelWidth = kernelSize[2]
+        let inputDepthAfterPadding = inputDepth + 2*paddings[0]
+        let inputHeightAfterPadding = inputHeight + 2*paddings[1]
+        let inputWidthAfterPadding = inputWidth + 2*paddings[2]
+        if kernelSize[0] > inputDepthAfterPadding then failwithf "Expecting kernelSize[0] (%A) <= inputDepthAfterPadding (%A)" kernelSize[0] inputDepthAfterPadding
+        if kernelSize[1] > inputHeightAfterPadding then failwithf "Expecting kernelSize[1] (%A) <= inputHeightAfterPadding (%A)" kernelSize[1] inputHeightAfterPadding
+        if kernelSize[2] > inputWidthAfterPadding then failwithf "Expecting kernelSize[1] (%A) <= inputWidthAfterPadding (%A)" kernelSize[1] inputWidthAfterPadding
+        let outputDepth = int (floor (float (inputDepthAfterPadding - kernelDepth)/(float strides[0]))) + 1
+        let outputHeight = int (floor (float (inputHeightAfterPadding - kernelHeight)/(float strides[1]))) + 1
+        let outputWidth = int (floor (float (inputWidthAfterPadding - kernelWidth)/(float strides[2]))) + 1
+        let outputShape = [|batchSize; channels; outputDepth; outputHeight; outputWidth|]
+        (batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), outputShape)
+
+    /// Checks if the given shapes are appropriate for a maxpool operation and returns information related to the resulting shape.
+    let checkCanMaxpool3d dtype shape kernelSize strides paddings =
+        checkCanMaxOrAvgpool3d "maxpool3d" dtype shape kernelSize strides paddings
+
+    /// Checks if the given shapes are appropriate for an avgpool operation and returns information related to the resulting shape.
+    let checkCanAvgpool3d dtype shape kernelSize strides paddings =
+        checkCanMaxOrAvgpool3d "avgpool3d" dtype shape kernelSize strides paddings
+
+    /// Checks if the given shapes are appropriate for a maxunpool operation and returns information related to the resulting shape.
+    let checkCanMaxunpool1d (dtype: Dtype) (shape: Shape) (indicesDtype: Dtype) (indicesShape: Shape) (outputSize: int[]) =
+        match dtype with
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "maxunpool2d" dtype
+        | _ ->
+        if indicesDtype <> Dtype.Int32 then failwithf "Expecting indices to have type %A" Dtype.Int32
+        if outputSize.Length <> 3 then failwithf "Expecting outputSize (%A) to be 3-dimensional" outputSize
+        let batchSize = shape[0]
+        let channels = shape[1]
+        let inputSize = shape[2]
+        if outputSize[0] <> indicesShape[0] || outputSize[1] <> indicesShape[1] then failwithf "Expecting the first two elements of outputSize (%A) and indicesShape (%A) to be the same" outputSize indicesShape
+        let outputShape = [|batchSize; channels; outputSize[2]|]
+        batchSize, channels, inputSize, outputShape
+
+    /// Checks if the given shapes are appropriate for a maxunpool operation and returns information related to the resulting shape.
+    let checkCanMaxunpool2d (dtype: Dtype) (shape: Shape) (indicesDtype: Dtype) (indicesShape: Shape) (outputSize: int[]) =
+        match dtype with
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "maxunpool2d" dtype
+        | _ ->
+        if indicesDtype <> Dtype.Int32 then failwithf "Expecting indices to have type %A" Dtype.Int32
+        if outputSize.Length <> 4 then failwithf "Expecting outputSize (%A) to be 4-dimensional" outputSize
+        let batchSize = shape[0]
+        let channels = shape[1]
+        let inputHeight = shape[2]
+        let inputWidth = shape[3]
+        if outputSize[0] <> indicesShape[0] || outputSize[1] <> indicesShape[1] then failwithf "Expecting the first two elements of outputSize (%A) and indicesShape (%A) to be the same" outputSize indicesShape
+        let outputShape = [|batchSize; channels; outputSize[2]; outputSize[3]|]
+        batchSize, channels, (inputHeight, inputWidth), outputShape
+
+    /// Checks if the given shapes are appropriate for a maxunpool operation and returns information related to the resulting shape.
+    let checkCanMaxunpool3d (dtype: Dtype) (shape: Shape) (indicesDtype: Dtype) (indicesShape: Shape) (outputSize: int[]) =
+        match dtype with
+        | Dtype.Bool | Dtype.Integral -> opNotSupported "maxunpool2d" dtype
+        | _ ->
+        if indicesDtype <> Dtype.Int32 then failwithf "Expecting indices to have type %A" Dtype.Int32
+        if outputSize.Length <> 5 then failwithf "Expecting outputSize (%A) to be 5-dimensional" outputSize
+        let batchSize = shape[0]
+        let channels = shape[1]
+        let inputDepth = shape[2]
+        let inputHeight = shape[3]
+        let inputWidth = shape[4]
+        if outputSize[0] <> indicesShape[0] || outputSize[1] <> indicesShape[1] then failwithf "Expecting the first two elements of outputSize (%A) and indicesShape (%A) to be the same" outputSize indicesShape
+        let outputShape = [|batchSize; channels; outputSize[2]; outputSize[3]; outputSize[4]|]
+        batchSize, channels, (inputDepth, inputHeight, inputWidth), outputShape
+
+    /// Indicates if one shape can expand into another through the addition of broadcast dimensions.
+    let canExpand (oldShape: Shape) (newShape: Shape) =
+        newShape.Length >= oldShape.Length &&
+        let trim = newShape.Length - oldShape.Length
+        newShape[..trim-1] |> Array.forall (fun m -> m >= 1)
+            && (oldShape,newShape[trim..]) ||> Array.forall2 (fun n m -> n = 1 || n = m)
+
+    /// Checks if one shape can expand into another through the addition of broadcast dimensions.
+    let checkCanExpand (oldShape: Shape) (newShape: Shape) =
+        let isOK = canExpand oldShape newShape
+        if not isOK then failwithf "can't expand from shape %A to %A - each dimension must either be equal or expand from 1" oldShape newShape
+
+    /// Checks if the given shape is appropriate for a transpose operation and returns information related to the resulting shape.
+    let checkCanTranspose (shape: Shape) (dim0: int) (dim1: int) =
+        if dim0 < 0 || dim0 >= shape.Length then failwithf "Expecting 0 <= dim0 (%A) < shape.Length (%A)" dim0 shape.Length
+        if dim1 < 0 || dim1 >= shape.Length then failwithf "Expecting 0 <= dim1 (%A) < shape.Length (%A)" dim1 shape.Length
+
+    /// Checks if the given shape is appropriate for a transpose operation.
+    let checkCanTranspose2d (dim: int) =
+        if dim <> 2 then failwith "Expecting dim=2 when no specific dimensions are given to transpose. Consider using general transpose(dim0, dim1)."
+
+    /// Checks if the given shape is appropriate for a transpose operation.
+    let checkCanInvert (shape: Shape) =
+        let dim = shape.Length
+        if not (dim = 2 || dim = 3) then failwith "Expecting 2d tensor (a square matrix) or a 3d tensor (a batch of square matrices)."
+        if dim = 2 then if shape[0] <> shape[1] then failwith "Expecting a square matrix"
+        if dim = 3 then if shape[1] <> shape[2] then failwith "Expecting square matrices"
+    
+    /// Checks if the given shape is appropriate for a determinant operation.
+    let checkCanDet (shape: Shape) =
+        let dim = shape.Length
+        if not (dim = 2 || dim = 3) then failwith "Expecting 2d tensor (a square matrix) or a 3d tensor (a batch of square matrices)."
+        if dim = 2 then if shape[0] <> shape[1] then failwith "Expecting a square matrix"
+        if dim = 3 then if shape[1] <> shape[2] then failwith "Expecting square matrices"
+    
+    /// Checks if the given shapes are appropriate for a linear solve operation, and returns the resulting shape of the solution
+    let checkCanSolve (shapeA: Shape) (shapeB: Shape) =
+        let dimA = shapeA.Length
+        let dimB = shapeB.Length
+        let newShape =
+            if dimA = 2 then
+                let n = shapeA[0]
+                if n <> shapeA[1] then failwithf "Expecting A to be a square matrix, received A with shape %A." shapeA
+                if n <> shapeB[0] then failwithf "Expecting A and B to have the same number of rows (1st dimension), received A and B with shapes %A and %A." shapeA shapeB
+                if dimB = 1 then
+                    // k = 1
+                    [|n|]
+                elif dimB = 2 then
+                    let k = shapeB[1]
+                    [|n; k|]
+                else
+                    failwithf "Expecting B to be a 1d or 2d tensor, received B with shape %A." shapeB
+            elif dimA = 3 then 
+                let batchSize = shapeA[0]
+                if batchSize <> shapeB[0] then failwithf "Expecting A and B to have the same number of batch items (1st dimension), received A and B with shapes %A and %A." shapeA shapeB
+                let n = shapeA[1]
+                if n <> shapeA[2] then failwithf "Expecting A to be a batch of square matrices, received A with shape %A." shapeA
+                if n <> shapeB[1] then failwithf "Expecting the matrices in batches A and B to have the same number of rows items (2nd dimension), received A and B with shapes %A and %A." shapeA shapeB
+                if dimB = 2 then
+                    // k = 1
+                    [|batchSize; n|]
+                elif dimB = 3 then
+                    let k = shapeB[2]
+                    [|batchSize; n; k|]
+                else
+                    failwithf "Expecting B to be a 2d tensor (batch of vectors) or 3d tensor (a batch of matrices), received B with shape %A." shapeB
+            else
+                failwithf "Expecting A to be a 2d tensor (a square matrix) or a 3d tensor (a batch of square matrices), received A with shape %A." shapeA
+        newShape
+
+    /// Checks if the given shape is appropriate for a permute operation and returns information related to the resulting shape.
+    let checkCanPermute (shape: Shape) (permutation: int[]) =
+        if shape.Length <> permutation.Length then failwithf "Expecting tensor's shape (%A) and permutation (%A) to have the same dims" shape permutation
+        if Seq.hasDuplicates permutation then failwithf "Expecting permutation (%A) to have no duplicate values" permutation
+        let inversePermutation = Array.permute (fun i -> permutation[i]) [| 0.. shape.Length-1 |]
+        let newShape = Array.permute (fun i -> inversePermutation[i]) shape
+        inversePermutation, newShape
+
+    /// Checks if the given shape is appropriate for a flip operation.
+    let checkCanFlip (dim: int) (dims: int[]) =
+        if dims.Length > dim then failwithf "Expecting dims (list of dimension indices to flip) of length less than Tensor's dimensions, received %A, %A" dims.Length dim
+        if Seq.hasDuplicates dims then failwithf "Expecting dims (list of dimension indices to flip) without repetition, received %A" dims
+        if (Array.max dims) >= dim then failwithf "Expecting dims (list of dimension indices to flip) where all indices are less than the tensor dimension, received %A, %A" dims dim
+
+    /// Checks if the given shape is appropriate for a repeat operation.
+    let checkCanRepeat (shape: Shape) (dim: int) =
+        if shape[dim] <> 1 then failwithf "Expecting Tensor's shape (%A) at dim (%A) to be 1" shape dim
+
+    /// Checks if the given shape is appropriate for a dilate operation.
+    let checkCanDilate (dim: int) (dilations: int[]) =
+        if dilations.Length <> dim then failwithf "Expecting dilations (dilation to use in each dimension) of same length with Tensor's dimensions, received %A, %A" dilations.Length dim
+        if (Array.min dilations) < 1 then failwithf "Expecting dilations (dilation to use in each dimension) >= 1 where 1 represents no dilation, received %A" dilations
+
+    /// Checks if the given shape is appropriate for a gather operation.
+    let checkCanGather (shape: Shape) (dim: int) (indicesShape: Shape) (indicesDtype:Dtype) =
+        if shape.Length <> indicesShape.Length then failwithf "Expecting tensor (%A) and indices (%A) to have the same number of dimensions" shape indicesShape
+        if dim < 0 || dim > shape.Length-1 then failwithf "Expecting 0<= dim (%A) < tensor dim (%A)" dim shape.Length
+        if indicesShape[dim] < 1 then failwithf "Expecting indices shape at dim %A (%A) >= 1" dim indicesShape[dim]
+        if indicesDtype <> Dtype.Int32 then failwithf "Expecting indices to have type %A" Dtype.Int32
+
+    /// Checks if the given shape is appropriate for a scatter operation.
+    let checkCanScatter (shape: Shape) (dim: int) (indicesShape: Shape) (indicesDtype:Dtype) (destinationShape: Shape)=
+        if shape.Length <> indicesShape.Length then failwithf "Expecting tensor (%A) and indices (%A) to have the same number of dimensions" shape indicesShape
+        if shape.Length <> destinationShape.Length then failwithf "Expecting tensor (%A) and destination (%A) to have the same number of dimensions" shape destinationShape
+        if not (contains shape indicesShape) then failwithf "Expecting tensor shape (%A) to contain indices shape (%A)" shape indicesShape
+        if dim < 0 || dim > shape.Length-1 then failwithf "Expecting 0<= dim (%A) < tensor dim (%A)" dim shape.Length
+        if indicesDtype <> Dtype.Int32 then failwithf "Expecting indices to have type %A" Dtype.Int32
+
+    /// Checks if the given shape is appropriate for a view operation.
+    let checkCanView (shape1: Shape) (shape2: Shape) =
+        if nelement shape1 <> nelement shape2 then failwithf "Cannot view Tensor of shape %A as shape %A" shape1 shape2
+
+    /// Checks if the given shape is appropriate for a flatten operation.
+    let checkCanFlatten (shape: Shape) (startDim: int) (endDim: int) =
+        if startDim < 0 || startDim >= shape.Length then failwithf "Expecting 0 <= startDim (%A) < %A" startDim shape.Length
+        if endDim < 0 || endDim >= shape.Length then failwithf "Expecting 0 <= endDim (%A) < %A" endDim shape.Length
+        if endDim <= startDim then failwithf "Expecting startDim (%A) < endDim (%A)" startDim endDim
+
+    /// Checks if the given shape is appropriate for an addSlice operation.
+    let checkCanAddSlice (shape1: Shape) (location: int[]) (shape2: Shape) =
+        if not (contains shape1 shape2) then failwithf "Expecting shape1 to contain shape2, received %A, %A" shape1 shape2
+        if location.Length <> shape1.Length then failwithf "Expecting location of the same length as shape1, received %A, %A" (location.Length) shape1
+
+    /// Checks if the given shapes are appropriate for a matmul operation.
+    let checkCanMatmul (shape1: Shape) (shape2: Shape) =
+        if shape1.Length < 2 || shape2.Length < 2 then failwithf "Expecting tensors to have at least two dimensions, received tensors with shapes %A, %A" shape1 shape2
+        let aBatchPart, aMatrixPart = Array.splitAt (shape1.Length-2) shape1
+        let bBatchPart, bMatrixPart = Array.splitAt (shape2.Length-2) shape2
+        if aMatrixPart[1] <> bMatrixPart[0] then failwithf "Cannot matrix multiply tensors with shapes %A, %A - mismatch in matrix dimension" shape1 shape2
+        (aBatchPart, aMatrixPart), (bBatchPart, bMatrixPart)
+
+    /// Checks if the given shapes are appropriate for a batched matrix multiplication operation.
+    let checkCanBMM (shape1: Shape) (shape2: Shape) =
+        if shape1.Length <> 3 || shape2.Length <> 3 then failwithf "Expecting two 3d tensors, received tensors with shapes %A, %A" shape1 shape2
+        if shape1[0] <> shape2[0] then failwithf "Cannot batch matrix multiply tensors with shapes %A, %A - mismatch in batch dimension" shape1 shape2
+        let batchSize = shape1[0]
+        if shape1[2] <> shape2[1] then failwithf "Cannot batch matrix multiply tensors with shapes %A, %A - mismatch in matrix dimension" shape1 shape2
+        let outputShape = [|batchSize; shape1[1]; shape2[2]|]
+        outputShape
+
+    /// Checks if the given shape is appropriate for a dot product operation.
+    let checkCanDot (shape1: Shape) (shape2: Shape) =
+        if shape1.Length <> 1 || shape2.Length <> 1 then failwithf "Expecting two vectors (1d Tensors), received tensors with shapes %A, %A" shape1 shape2
+        if shape1[0] <> shape2[0] then failwithf "Cannot multiply vectors with different lengths %A, %A" shape1[0] shape2[0]
+
+    /// Checks if the given shape is appropriate for a pad operation.
+    let checkCanPad (shape: Shape) (paddings: int[]) =
+        if shape.Length <> paddings.Length then failwithf "Expecting shape (%A) and paddings (%A) to have the same length" shape paddings
+        if not (paddings |> Array.forall (fun p -> p >= 0)) then failwithf "Expecting all paddings (%A) >= 0" paddings
+
+    /// Checks if the given shape is appropriate for a dropout operation.
+    let checkCanDropout (p:double) =
+        if p < 0. || p > 1. then failwithf "Expecting 0 <= p <= 1, but received %A" p
+
+    /// Checks if the given shape is appropriate for a dropout2d operation.
+    let checkCanDropout2d (shape: Shape) (p:double) =
+        checkCanDropout p
+        if shape.Length <> 4 then failwithf "Expecting shape (%A) to be 4-dimensional (NxCxHxW: batchSize, inputChannels, inputHeight, inputWidth)" shape
+
+    /// Checks if the given shape is appropriate for a dropout3d operation.
+    let checkCanDropout3d (shape: Shape) (p:double) =
+        checkCanDropout p
+        if shape.Length <> 5 then failwithf "Expecting shape (%A) to be 5-dimensional (NxCxDxHxW: batchSize, inputChannels, inputDepth, inputHeight, inputWidth)" shape
+
+    /// Computes the shape that results from a squeeze operation.
+    let squeeze (dim: int) (shape: Shape) =
+        if dim = -1 then
+            [|for s in shape do if s <> 1 then yield s|]
+        elif shape[dim] = 1 then
+            [|for i=0 to shape.Length - 1 do
+                if i < dim then yield shape[i]
+                elif i > dim then yield shape[i]|]
+        else
+            shape
+
+    let checkCanMinMaxReduce (dim: int) (keepDim: bool) (shape: Shape) =
+        if dim >= shape.Length || dim < 0 then failwithf "Expecting dim to be between 0 and %d" shape.Length
+        let part1 = shape[..dim-1]
+        let part2 = shape[dim+1..]
+        [| yield! part1
+           if keepDim then yield 1
+           yield! part2 |] 
+
+    /// Checks if the given shape is appropriate for an unsqueeze operation and returns the resulting shape.
+    let checkCanUnsqueeze (dim: int) (shape: Shape) =
+        if dim < 0 || dim > shape.Length then failwithf "Expecting dim in range [0, %A] but received %A" shape.Length dim
+        [|for i=0 to shape.Length - 1 + 1 do
+            if i < dim then yield shape[i]
+            elif i = dim then yield 1
+            else yield shape[i-1]|]
+
+    /// Computes the shape that results from an unsqueezeAs operation.
+    let unsqueezeAs (shape1: Shape) (shape2: Shape) =
+        if shape1.Length > shape2.Length then failwithf "Expecting shape1.Length (%A) <= shape2.Length (%A)" shape1.Length shape2.Length
+        let ones = Array.create (shape2.Length - shape1.Length) 1
+        Array.append ones shape1
+
+    /// Converts the given location to a three-element bounds array in the context of the given shape.
+    let locationToBounds (shape: Shape) (location: int[]) =
+        Array2D.init location.Length 3 (fun i j -> if j=0 then location[i] elif j=1 then location[i] + shape[i] - 1 else 0)
+
+    /// Computes the shape that results from a flatten operation.
+    let flatten (startDim: int) (endDim: int) (shape: Shape) =
+        let shape = [|for i in 0..shape.Length-1 do if (i < startDim) || (i > endDim) then shape[i] else -1|]
+        let mutable emitted = false
+        [|for s in shape do if s <> -1 then s elif not emitted then emitted <- true; -1|]
+
+    /// Finds the shape into which `shape1` and `shape2` can be expanded.
+    let broadcast2 (shape1: Shape) (shape2: Shape) =
+        if canExpand shape1 shape2 || canExpand shape2 shape1 then
+            let n1 = shape1.Length
+            let n2 = shape2.Length
+            let mx = max n1 n2
+            let mn = mx - min n1 n2
+            Array.init mx (fun i ->
+                if i < mn then (if n1 > n2 then shape1[i] else shape2[i])
+                elif n1 > n2 then max shape1[i] shape2[i-mn]
+                else max shape1[i-mn] shape2[i])
+        else failwithf "shapes %A and %A are not related by broadcasting - each dimension must either be extra, equal, expand from 1" shape1 shape2
+
+    /// Finds the shape into which all the shapes can be expanded.
+    let broadcastShapes (shapes: Shape[]) = Array.reduce broadcast2 shapes
+
+    // /// Computes the shape that results from a pairwise dilation operation.
+    // let dilated2 (shape: Shape) (dilations: int[]) =
+    //     Array.map2 (*) shape dilations
+
+    /// Computes the shape that results from an undilation operation.
+    let undilatedShape (shape: Shape) (dilations: int[]) =
+        Array.map2 (fun n d -> (n + d - 1) / d) shape dilations
+
+    /// Completes the given shape with respect to a tensor with the given number of elements.
+    let complete (nelement: int) (shape: Shape) =
+        if (shape |> Array.filter (fun x -> x < -1) |> Array.length) > 0 then failwithf "Invalid shape %A" shape
+        let numUnspecified = shape |> Array.filter ((=) -1) |> Array.length
+        if numUnspecified > 1 then
+            failwithf "Cannot complete shape %A, expecting at most one unspecified dimension (-1)" shape
+        elif numUnspecified = 0 then
+            shape
+        else
+            let divisor = shape |> Array.filter ((<>) -1) |> Shape.nelement
+            if nelement % divisor <> 0 then failwithf "Cannot complete shape %A to have %A elements" shape nelement
+            let missing = nelement / divisor
+            [|for d in shape do if d = -1 then yield missing else yield d|]
+
+    /// Completes the given shape dimension with respect to a concrete dimension.
+    let completeDim (dims:int) (dim:int) =
+      if dim < -dims || dim >= dims then failwithf "Expecting dim (%A) to be within the range [%A, %A)" dim (-dims) dims
+      if dim < 0 then dims+dim
+      else dim
+
+    /// Completes the given shape dimension with respect to a concrete dimension, for the unsqueeze operation.
+    let completeDimUnsqueeze (dims:int) (dim:int) =
+      if dim < (-1 - dims) || dim >= (dims + 1) then failwithf "Expecting dim (%A) to be within the range [%A, %A)" dim (-1 - dims) (dims + 1)
+      if dim < 0 then dims + dim + 1
+      else dim
+
+    /// Completes the new shape for an expand operation based on the current shape of the tensor.
+    let completeExpand (shape: Shape) (newShape: Shape) =
+        let trim = newShape.Length - shape.Length
+        newShape |> Array.mapi (fun i x -> if i>=trim && x = -1 then shape[i - trim] else x)
+
+    let completeSliceBounds (shape: Shape) (bounds:int[,]) =
+        let newBounds = Array2D.init (bounds.GetLength(0)) (bounds.GetLength(1)) 
+                            (fun i j -> 
+                                if j = 0 || j = 1 then completeDim shape[i] bounds[i, j]
+                                else bounds[i, j])
+        newBounds
+
+    let inline create (xs: seq<int>) = Seq.toArrayQuick xs
+
+    let resolve2dKernelSizes kernelSize kernelSizes = 
+        match kernelSize, kernelSizes with
+        | Some _ , Some _ -> failwithf "Expecting only one of kernelSize, kernelSizes"
+        | Some k, None -> [|k; k|]
+        | None, Some k -> let k = k |> Array.ofSeq in if k.Length <> 2 then failwithf "Expecting kernelSizes to have length two" else k
+        | _ -> [|1; 1|]
+
+    let resolve3dKernelSizes kernelSize kernelSizes = 
+        match kernelSize, kernelSizes with
+        | Some _ , Some _ -> failwithf "Expecting only one of kernelSize, kernelSizes"
+        | Some k, None -> [|k; k; k|]
+        | None, Some k -> let k = k |> Array.ofSeq in if k.Length <> 3 then failwithf "Expecting kernelSizes to have length three" else k
+        | _ -> [|1; 1; 1|]
+
+    let resolve2dConvSizes stride strides padding paddings dilation dilations =
+        let strides = 
+            match stride, strides with
+            | Some _, Some _ -> failwithf "Expecting only one of stride, strides"
+            | Some s, None -> [|s; s|]
+            | None, Some s -> let s = s |> Array.ofSeq in if s.Length <> 2 then failwithf "Expecting strides to be 2-dimensional" else s
+            | _ -> [|1; 1|]
+        let paddings = 
+            match padding, paddings with
+            | Some _ , Some _ -> failwithf "Expecting only one of padding, paddings"
+            | Some p, None -> [|p; p|]
+            | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 2 then failwithf "Expecting paddings to be 2-dimensional" else p
+            | _ -> [|0; 0|]
+        let dilations = 
+            match dilation, dilations with
+            | Some _ , Some _ -> failwithf "Expecting only one of dilation, dilations"
+            | Some d, None -> [|d; d|]
+            | None, Some d -> let d = d |> Array.ofSeq in if d.Length <> 2 then failwithf "Expecting dilations to be 2-dimensional" else d
+            | _ -> [|1; 1|]
+        strides, paddings, dilations
+
+    let resolve3dConvSizes stride strides padding paddings dilation dilations =
+        let strides = 
+            match stride, strides with
+            | Some _ , Some _ -> failwithf "Expecting only one of stride, strides"
+            | Some s, None -> [|s; s; s|]
+            | None, Some s -> let s = s |> Array.ofSeq in if s.Length <> 3 then failwithf "Expecting strides to be 3-dimensional" else s
+            | _ -> [|1; 1; 1|]
+        let paddings = 
+            match padding, paddings with
+            | Some _ , Some _ -> failwithf "Expecting only one of padding, paddings"
+            | Some p, None -> [|p; p; p|]
+            | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 3 then failwithf "Expecting paddings to be 3-dimensional" else p
+            | _ -> [|0; 0; 0|]
+        let dilations = 
+            match dilation, dilations with
+            | Some _ , Some _ -> failwithf "Expecting only one of dilation, dilations"
+            | Some d, None -> [|d; d; d|]
+            | None, Some d -> let d = d |> Array.ofSeq in if d.Length <> 3 then failwithf "Expecting dilations to be 3-dimensional" else d
+            | _ -> [|1; 1; 1|]
+        strides, paddings, dilations
+
+    let resolve2dConvOutputPadding outputPadding outputPaddings =
+        match outputPadding, outputPaddings with
+        | Some _ , Some _ -> failwithf "Expecting only one of outputPadding, outputPaddings"
+        | Some p, None -> [|p; p|]
+        | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 2 then failwithf "Expecting outputPaddings to be 2-dimensional" else p
+        | _ -> [|0; 0|]
+
+    let resolve3dConvOutputPadding outputPadding outputPaddings =
+        match outputPadding, outputPaddings with
+        | Some _ , Some _ -> failwithf "Expecting only one of outputPadding, outputPaddings"
+        | Some p, None -> [|p; p; p|]
+        | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 3 then failwithf "Expecting outputPaddings to be 3-dimensional" else p
+        | _ -> [|0; 0; 0|]
+
+    let resolve2dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings =
+        let kernelSizes =
+            match kernelSize, kernelSizes with
+            | Some _, Some _ -> failwithf "Expecting only one of kernelSize, kernelSizes"
+            | Some k, None -> [|k; k|]
+            | None, Some k -> let k = k |> Array.ofSeq in if k.Length <> 2 then failwithf "Expecting kernelSizes to be 2-dimensional" else k
+            | _ -> failwithf "Expecting either kernelSize or kernelSizes"
+
+        let strides =
+            match stride, strides with
+            | Some _, Some _ -> failwithf "Expecting only one of stride, strides"
+            | Some s, None -> [|s; s|]
+            | None, Some s -> let s = s |> Array.ofSeq in if s.Length <> 2 then failwithf "Expecting strides to be 2-dimensional" else s
+            | _ -> kernelSizes
+
+        let paddings =
+            match padding, paddings with
+            | Some _, Some _ -> failwithf "Expecting only one of padding, paddings"
+            | Some p, None -> [|p; p|]
+            | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 2 then failwithf "Expecting paddings to be 2-dimensional" else p
+            | _ -> [|0; 0|]
+        kernelSizes, strides, paddings
+
+    let resolve3dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings =
+        let kernelSizes =
+            match kernelSize, kernelSizes with
+            | Some _, Some _ -> failwithf "Expecting only one of kernelSize, kernelSizes"
+            | Some k, None -> [|k; k; k|]
+            | None, Some k -> let k = k |> Array.ofSeq in if k.Length <> 3 then failwithf "Expecting kernelSizes to be 3-dimensional" else k
+            | _ -> failwithf "Expecting either kernelSize or kernelSizes"
+        let strides =
+            match stride, strides with
+            | Some _, Some _ -> failwithf "Expecting only one of stride, strides"
+            | Some s, None -> [|s; s; s|]
+            | None, Some s -> let s = s |> Array.ofSeq in if s.Length <> 3 then failwithf "Expecting strides to be 3-dimensional" else s
+            | _ -> kernelSizes
+        let paddings =
+            match padding, paddings with
+            | Some _, Some _ -> failwithf "Expecting only one of padding, paddings"
+            | Some p, None -> [|p; p; p|]
+            | None, Some p -> let p = p |> Array.ofSeq in if p.Length <> 3 then failwithf "Expecting paddings to be 3-dimensional" else p
+            | _ -> [|0; 0; 0|]
+        kernelSizes, strides, paddings
+
+
+[<AutoOpen>]
+module ShapeAutoOpens =
+
+    /// Gets the total number of elements in a shape.
+    let shapeLength (shape: Shape) = Shape.nelement shape
+
+    /// Checks if the full bounds is a scalar location
+    let boundsIsScalar (bounds: int[,]) =
+        let mutable res = true
+        for i=0 to bounds.GetLength(0) - 1 do 
+            res <- res && bounds[i,2] = 1
+        res
+
+    /// Converts the array of three-position bounds specifications to a location.
+    let boundsToLocation (bounds: int[,]) =
+        [|for i=0 to bounds.GetLength(0) - 1 do yield bounds[i, 0]|]
+
+    /// Converts the array of three-position bounds specifications to a shape without squeezing out scalars
+    let boundsToShape (bounds: int[,]) =
+        [|for i=0 to bounds.GetLength(0) - 1 do 
+             let len = bounds[i, 1] - bounds[i, 0] + 1
+             yield len|]
+
+    let shapeToFullBounds (shape: Shape) =
+        Array2D.init (shape.Length) 3 (fun i j -> if j=0 then 0 elif j=1 then shape[i]-1 else 0)
+
+    /// Mirrors the coordinates in the given dimensions in the context of the given shape.
+    let mirrorCoordinates (coordinates: int[]) (shape: int[]) (mirrorDims: int[]) =
+        if coordinates.Length <> shape.Length then failwithf "Expecting coordinates and shape of the same dimension, received %A, %A" coordinates.Length shape.Length
+        let result = Array.copy coordinates
+        for d=0 to coordinates.Length-1 do
+            if mirrorDims |> Array.contains d then
+                result[d] <- abs (coordinates[d] - shape[d] + 1)
+        result
+
+    /// Dilates the given coordinates.
+    let dilatedCoordinates (coordinates: int[]) (dilations: int[]) =
+        Array.map2 (*) coordinates dilations
+
+    /// Converts the given index to a flat index in the context of the given shape.
+    let indexToFlatIndex (shape: int[]) (index: int[]) =
+        Shape.checkCanIndex shape index
+        let mutable flatIndex = 0
+        for i=0 to index.Length - 1 do
+            let v = if i = index.Length - 1 then 1 else (Array.reduce (*) shape[i+1..])
+            flatIndex <- flatIndex + index[i] * v
+        flatIndex
+
+    /// Converts the given flat index to an index in the context of the given shape.
+    let flatIndexToIndex (shape: int[]) (flatIndex: int) =
+        let dim = shape.Length
+        let nelement = shapeLength shape
+        let index = Array.create dim 0
+        let mutable mul = nelement
+        let mutable fi = flatIndex
+        for i=dim downto 1 do
+            mul <- mul / shape[dim-i]
+            index[i-1] <- fi / mul
+            fi <- fi - index[i-1] * mul
+        index |> Array.rev
diff --git a/src/TensorMath/Tensor.Slicing.fs b/src/TensorMath/Tensor.Slicing.fs
new file mode 100644
index 0000000..fbf3016
--- /dev/null
+++ b/src/TensorMath/Tensor.Slicing.fs
@@ -0,0 +1,2695 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+open TensorMath
+open System.Diagnostics.CodeAnalysis
+
+[<AutoOpen>]
+module SlicingExtensions =
+  type Tensor with
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option) =
+        // Dims: 1
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int) =
+        // Dims: 1
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let bounds = array2D [[i0min; i0max; i0given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option) =
+        // Dims: 2
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int) =
+        // Dims: 2
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option) =
+        // Dims: 2
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int) =
+        // Dims: 2
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option) =
+        // Dims: 3
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int) =
+        // Dims: 3
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option) =
+        // Dims: 3
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int) =
+        // Dims: 3
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option) =
+        // Dims: 3
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int) =
+        // Dims: 3
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option) =
+        // Dims: 3
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int) =
+        // Dims: 3
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int) =
+        // Dims: 4
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int) =
+        // Dims: 4
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int, i4min:int option, i4max:int option) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int, i4:int) =
+        // Dims: 5
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1min:int option, i1max:int option, i2:int, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2min:int option, i2max:int option, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0min:int option, i0max:int option, i1:int, i2:int, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = if i0min.IsSome && i0max.IsSome then 1 else 0
+        let i0min   = defaultArg i0min 0
+        let i0max   = defaultArg i0max (t.shape[0] - 1)
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2min:int option, i2max:int option, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1min:int option, i1max:int option, i2:int, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = if i1min.IsSome && i1max.IsSome then 1 else 0
+        let i1min   = defaultArg i1min 0
+        let i1max   = defaultArg i1max (t.shape[1] - 1)
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2min:int option, i2max:int option, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = if i2min.IsSome && i2max.IsSome then 1 else 0
+        let i2min   = defaultArg i2min 0
+        let i2max   = defaultArg i2max (t.shape[2] - 1)
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3min:int option, i3max:int option, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = if i3min.IsSome && i3max.IsSome then 1 else 0
+        let i3min   = defaultArg i3min 0
+        let i3max   = defaultArg i3max (t.shape[3] - 1)
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int, i4min:int option, i4max:int option, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int, i4min:int option, i4max:int option, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = if i4min.IsSome && i4max.IsSome then 1 else 0
+        let i4min   = defaultArg i4min 0
+        let i4max   = defaultArg i4max (t.shape[4] - 1)
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int, i4:int, i5min:int option, i5max:int option) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = if i5min.IsSome && i5max.IsSome then 1 else 0
+        let i5min   = defaultArg i5min 0
+        let i5max   = defaultArg i5max (t.shape[5] - 1)
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
+    [<ExcludeFromCodeCoverage>]
+    /// <summary></summary> <exclude />
+    member t.GetSlice(i0:int, i1:int, i2:int, i3:int, i4:int, i5:int) =
+        // Dims: 6
+        let i0given = 1
+        let i0min   = i0
+        let i0max   = i0
+        let i1given = 1
+        let i1min   = i1
+        let i1max   = i1
+        let i2given = 1
+        let i2min   = i2
+        let i2max   = i2
+        let i3given = 1
+        let i3min   = i3
+        let i3max   = i3
+        let i4given = 1
+        let i4min   = i4
+        let i4max   = i4
+        let i5given = 1
+        let i5min   = i5
+        let i5max   = i5
+        let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]; [i4min; i4max; i4given]; [i5min; i5max; i5given]]
+        t.GetSlice(bounds)
diff --git a/src/TensorMath/Tensor.fs b/src/TensorMath/Tensor.fs
new file mode 100644
index 0000000..db4eeb2
--- /dev/null
+++ b/src/TensorMath/Tensor.fs
@@ -0,0 +1,2431 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+open TensorMath.Backends
+open TensorMath.Util
+open System
+
+#nowarn "1182" // turn off compiler-generated unused variable warnings in this file only
+
+/// <summary>
+///   Represents a multi-dimensional data type containing elements of a single data type.
+/// </summary>
+///
+/// <example>
+///   A tensor can be constructed from a list or sequence using <see cref="M:TensorMath.dsharp.tensor(System.Object)" />
+///
+///  <code>
+///    let t = dsharp.tensor([[1.; -1.]; [1.; -1.]])
+///  </code>
+/// </example>
+[<Struct; CustomEquality; CustomComparison>]
+type Tensor = 
+    internal 
+    | TensorC of raw:RawTensor
+
+    /// Gets the value of the tensor ignoring its first derivative
+    member t.primal =
+        match t with
+        | TensorC(_) -> t
+
+    /// Gets the value of the tensor ignoring all its derivatives
+    member t.primalDeep =
+        match t with
+        | TensorC(_) -> t
+
+    /// Gets the raw value of the tensor ignoring all its derivatives
+    member t.primalRaw =
+        match t with
+        | TensorC(tp) -> tp
+
+    member t.noDiff() = t
+
+    /// Gets the differentiation nesting tag of the tensor
+    member t.nestingTag =
+        match t with
+        | TensorC(_) -> failwithf "Cannot get nesting tag of constant tensor"
+
+    /// Converts the tensor to a new tensor with the given <see cref="T:TensorMath.Dtype"/>
+    member t.cast(dtype) =
+        if t.dtype = dtype then t else
+        match t with
+        | TensorC(tp) -> TensorC(tp.Cast(dtype))
+
+    /// Converts the tensor to a new tensor with the given system type
+    member t.cast<'T>() =
+        match box Unchecked.defaultof<'T> with
+        | :? float32 -> t.cast(Dtype.Float32)
+        | :? double -> t.cast(Dtype.Float64)
+        | :? int32 -> t.cast(Dtype.Int32)
+        | :? int64 -> t.cast(Dtype.Int64)
+        | :? int16 -> t.cast(Dtype.Int16)
+        | :? int8 -> t.cast(Dtype.Int8)
+        | :? byte -> t.cast(Dtype.Byte)
+        | :? bool -> t.cast(Dtype.Bool)
+        | _ -> failwithf "Cannot cast tensor with type %A to given type %A" t.dtype typeof<'T>
+
+    /// Returns a new tensor with the same contents moved to the given backend
+    member t.move(backend: Backend) =
+        // If a backend move is needed then first move to the CPU
+        let t = 
+            if t.backend = backend then t
+            elif t.device = Device.CPU then t
+            else t.move(Device.CPU)
+
+        if t.backend = backend then t else
+        match t with
+        | TensorC(tp) -> 
+            let tpflat = tp.ViewT([|tp.Nelement|])
+            let tpflatValues = tpflat.ToValues()
+            TensorC(tp.CreateLike(tpflatValues, backend=backend).ViewT(tp.Shape))
+
+    /// Returns a new tensor with the same contents moved to the given device
+    member t.move(device: Device) =
+        if t.device = device then t else
+        match t with
+        | TensorC(tp) -> TensorC(tp.MoveTo(device))
+
+    /// Returns a new tensor with the same contents moved to the given configuration
+    member t.move(?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        let t = match backend with None -> t | Some backend -> t.move(backend)
+        let t = match dtype with None -> t | Some dtype -> t.cast(dtype)
+        let t = match device with None -> t | Some device -> t.move(device)
+        t
+
+    member internal t.castAfterSummation(?dtype:Dtype) =
+        match dtype with
+        | None -> t
+        | Some dt -> t.cast(dt)
+
+    /// Returns a new tensor with the same contents moved to the CPU
+    member t.cpu() = t.move(Device.CPU)
+
+    /// Returns a new tensor with the same contents moved to the primary GPU device
+    member t.gpu() = t.move(Device.GPU)
+
+    /// Returns a new tensor with each element converted to type bool
+    member t.bool() = t.cast(Dtype.Bool)
+
+    /// Returns a new tensor with each element converted to type int8
+    member t.int8() = t.cast(Dtype.Int8)
+
+    /// Returns a new tensor with each element converted to type int16
+    member t.int16() = t.cast(Dtype.Int16)
+
+    /// Returns a new tensor with each element converted to type int32
+    member t.int32() = t.cast(Dtype.Int32)
+
+    /// Returns a new tensor with each element converted to type int32
+    member t.int() = t.cast(Dtype.Int32)
+
+    /// Returns a new tensor with each element converted to type int64
+    member t.int64() = t.cast(Dtype.Int64)
+
+    /// Returns a new tensor with each element converted to type float16
+    member t.float16() = t.cast(Dtype.Float16)
+
+    /// Returns a new tensor with each element converted to type bfloat16
+    member t.bfloat16() = t.cast(Dtype.BFloat16)
+
+    /// Returns a new tensor with each element converted to type float32
+    member t.float32() = t.cast(Dtype.Float32)
+
+    /// Returns a new tensor with each element converted to type float64
+    member t.float64() = t.cast(Dtype.Float64)
+
+    /// Returns a new tensor with each element converted to type float64
+    member t.float() = t.cast(Dtype.Float64)
+
+    /// Returns a new tensor with each element converted to type float64
+    member t.double() = t.cast(Dtype.Float64)
+
+    /// Returns a new tensor with each element converted to type float64
+    member t.byte() = t.cast(Dtype.Byte)
+
+    /// Gets the element type of the tensor
+    member t.dtype = t.primalRaw.Dtype
+
+    /// Gets the device of the tensor
+    member t.device = t.primalRaw.Device
+
+    /// Gets the device type of the tensor
+    member t.deviceType = t.primalRaw.Device.DeviceType
+
+    /// Gets the backend of the tensor
+    member t.backend = t.primalRaw.Backend
+
+    /// Gets the parent operation of a tensor used in reverse-mode differentiation
+    member t.parentOp =
+        match t with
+        | TensorC(_) -> failwith "Cannot get parent operation of constant Tensor"
+
+    /// Gets the shape of the tensor
+    member t.shape = t.primalRaw.Shape
+
+    member internal t.shapeFullBounds = shapeToFullBounds(t.shape)
+
+    /// Gets the number of dimensions of the tensor
+    member t.dim = t.primalRaw.Dim
+
+    /// Gets the number of elements in the tensor
+    member t.nelement = t.primalRaw.Nelement
+
+    /// Returns the value of a scalar tensor as an object
+    member t.toScalar() = t.primalRaw.ToScalar()
+
+    /// Returns the value of a (non-scalar) tensor as an array
+    member t.toArray() = t.primalRaw.ToArray()
+
+    /// Returns the value of a 1D tensor as a 1D array
+    member t.toArray1D<'T>() = 
+        if t.dim <> 1 then failwithf "Cannot convert tensor with shape %A to 1D array" t.shape
+        t.cast<'T>().toArray() :?> 'T[]
+
+    /// Returns the value of a 2D tensor as a 2D array
+    member t.toArray2D<'T>() = 
+        if t.dim <> 2 then failwithf "Cannot convert tensor with shape %A to 2D array" t.shape
+        t.cast<'T>().toArray() :?> 'T[,]
+
+    /// Returns the value of a 3D tensor as a 3D array
+    member t.toArray3D<'T>() = 
+        if t.dim <> 3 then failwithf "Cannot convert tensor with shape %A to 3D array" t.shape
+        t.cast<'T>().toArray() :?> 'T[,,]
+
+    /// Returns the value of a 4D tensor as a 4D array
+    member t.toArray4D<'T>() = 
+        if t.dim <> 4 then failwithf "Cannot convert tensor with shape %A to 4D array" t.shape
+        t.cast<'T>().toArray() :?> 'T[,,,]      
+
+    /// Returns the value of a 5D tensor as a 5D array
+    member t.toArray5D<'T>() = 
+        if t.dim <> 5 then failwithf "Cannot convert tensor with shape %A to 5D array" t.shape
+        t.cast<'T>().toArray()
+
+    /// Returns the value of a 6D tensor as a 6D array
+    member t.toArray6D<'T>() = 
+        if t.dim <> 6 then failwithf "Cannot convert tensor with shape %A to 6D array" t.shape
+        t.cast<'T>().toArray()
+
+    /// <summary>Saves the tensor to the given file using a bespoke binary format.</summary>
+    /// <remarks>
+    ///   The binary format records the elements, backend, element type and shape. It does not record the device.
+    ///   The format used may change from version to version of TensorMath.
+    /// </remarks>
+    member t.save(fileName:string) = saveBinary t fileName
+
+    /// <summary>Loads the tensor from the given file using the given element type and configuration.</summary>
+    ///
+    /// <param name="fileName">The file from which to load the tensor.</param>
+    /// <param name="device">The device of the resulting tensor. Defaults to the current default device.</param>
+    /// <param name="dtype">The element type of the resulting tensor. Defaults to the element type of the saved tensor.</param>
+    /// <param name="backend">The device of the resulting tensor. Defaults to the current default backend.</param>
+    ///
+    /// <remarks>
+    ///    The backend at the time of saving the tensor must be available when the tensor is reloaded.
+    ///    The tensor is first loaded into that backend and then moved. As a result, intermediate tensors may be created
+    ///    in the process of reloading.
+    /// </remarks>
+    static member load(fileName:string, ?device: Device, ?dtype: Dtype, ?backend: Backend):Tensor =
+        let t : Tensor = loadBinary fileName
+        let device = defaultArg device Device.Default
+        let dtype = defaultArg dtype t.dtype
+        let backend = defaultArg backend Backend.Default
+        t.move(device=device, dtype=dtype, backend=backend)
+
+    /// Returns the tensor after min-max scaling
+    member t.normalize() =
+        let min = t.min()
+        let range = t.max() - min
+        if range = t.zeroLike() then
+            t.zerosLike()
+        else
+            (t - min) / range
+
+    /// Returns the tensor after standardization (z-score normalization)
+    member t.standardize() =
+        let stddev:Tensor = t.std()
+        if stddev = t.zeroLike() || stddev.hasnan() then
+            t.zerosLike()
+        else
+            (t - t.mean()) / stddev
+
+    /// Returns a string summarising the tensor
+    member t.summary() =
+        match t with
+        | TensorC(_) -> sprintf "Tensor %A" t.shape
+
+    /// A debugging routine that returns the ancestors of a tensor involved in reverse-mode automatic differentiation
+    member t.ancestors() =
+        let mutable p = []
+        let rec ancestors (t:obj) d =
+            match t with
+            | :? Tensor as t ->
+                p <- p |> List.append [t]
+                match t with
+                | TensorC(_) -> sprintf "Tensor %A" t.shape
+            | :? (Tensor array) as ts ->
+                // p <- p |> List.append (ts |> Array.toList)
+                let mutable ret = ""
+                let mutable prefix = ""
+                for t in ts do
+                    ret <- ret + sprintf "%s%s%s" prefix (String.replicate d " ") (ancestors t (d+1))
+                    prefix <- "\n"
+                ret
+            // | _ -> indentNewLines (sprintf "%A" t) d
+            | _ -> ""
+        let ps = ancestors t 1
+        p |> List.rev, ps
+
+    override t.ToString() = 
+        let rec fmt postfix (t: Tensor) =
+            match t with
+            | TensorC(p) -> p.Print(postfix)
+        fmt "" t
+
+    override t.Equals(other) =
+        match other with
+        | :? Tensor as tensor -> t.primalRaw.Equals(tensor.primalRaw)
+        | _ -> false
+
+    override t.GetHashCode() = hash t.primalRaw
+
+    interface System.IComparable with
+        override t.CompareTo(other) =
+            match other with
+            | :? Tensor as tensor -> 
+                if t.dim = tensor.dim && t.dim = 0 then
+                    (t.primalRaw :> System.IComparable).CompareTo(tensor.primalRaw)
+                else
+                    failwith "Cannot compare non-scalar Tensors"
+            | _ -> failwith "Cannot compare Tensor with another type"
+
+    /// Get the scalar zero tensor for the current configuration
+    static member Zero = TensorC(RawTensor.Zero())
+
+    /// Get the scalar one tensor for the current configuration
+    static member One = TensorC(RawTensor.One())
+
+    /// Convert a scalar tensor to a float32 value
+    static member op_Explicit(tensor:Tensor):single = tensor.toScalar().toSingle()
+
+    /// Convert a scalar tensor to a float64 value
+    static member op_Explicit(tensor:Tensor):double = tensor.toScalar().toDouble()
+
+    /// Convert a scalar tensor to a byte value
+    static member op_Explicit(tensor:Tensor):byte = tensor.toScalar().toByte()
+
+    /// Convert a scalar tensor to a signed byte value
+    static member op_Explicit(tensor:Tensor):int8 = tensor.toScalar().toSByte()
+
+    /// Convert a scalar tensor to an int16 value
+    static member op_Explicit(tensor:Tensor):int16 = tensor.toScalar().toInt16()
+
+    /// Convert a scalar tensor to an int32 value
+    static member op_Explicit(tensor:Tensor):int32 = tensor.toScalar().toInt32()
+
+    /// Convert a scalar tensor to an int64 value
+    static member op_Explicit(tensor:Tensor):int64 = tensor.toScalar().toInt64()
+
+    /// Convert a scalar tensor to a boolean value
+    static member op_Explicit(tensor:Tensor):bool = tensor.toScalar().toBool()
+
+    interface System.IConvertible with
+        override t.GetTypeCode() =
+            match t.dtype with 
+            | Dtype.Byte -> TypeCode.Byte
+            | Dtype.Int8 -> TypeCode.SByte
+            | Dtype.Int16 -> TypeCode.Int16
+            | Dtype.Int32 -> TypeCode.Int32
+            | Dtype.Int64 -> TypeCode.Int64
+            | Dtype.Float32 -> TypeCode.Single
+            | Dtype.Float64 -> TypeCode.Double
+            | Dtype.Bool -> TypeCode.Boolean
+            | Dtype.BFloat16 -> TypeCode.Single
+            | Dtype.Float16 -> TypeCode.Single
+
+        override t.ToSingle(fmt) = t.toScalar().ToSingle(fmt)
+        override t.ToDouble(fmt) = t.toScalar().ToDouble(fmt)
+        override t.ToByte(fmt) = t.toScalar().ToByte(fmt)
+        override t.ToSByte(fmt) = t.toScalar().ToSByte(fmt)
+        override t.ToInt16(fmt) = t.toScalar().ToInt16(fmt)
+        override t.ToInt32(fmt) = t.toScalar().ToInt32(fmt)
+        override t.ToInt64(fmt) = t.toScalar().ToInt64(fmt)
+        override t.ToBoolean(fmt) = t.toScalar().ToBoolean(fmt)
+        override t.ToChar(fmt) = t.toScalar().ToChar(fmt)
+        override t.ToDateTime(fmt) = t.toScalar().ToDateTime(fmt)
+        override t.ToDecimal(fmt) = t.toScalar().ToDecimal(fmt)
+        override t.ToString(fmt) = t.toScalar().ToString(fmt)
+        override t.ToType(ty, fmt) = t.toScalar().ToType(ty, fmt)
+        override t.ToUInt16(fmt) = t.toScalar().ToUInt16(fmt)
+        override t.ToUInt32(fmt) = t.toScalar().ToUInt32(fmt)
+        override t.ToUInt64(fmt) = t.toScalar().ToUInt64(fmt)
+
+    /// Convert a scalar tensor to a float32 value
+    member t.toSingle() = t.toScalar().toSingle()
+
+    /// Convert a scalar tensor to a float64 value
+    member t.toDouble() = t.toScalar().toDouble()
+
+    /// Convert a scalar tensor to a byte value
+    member t.toByte() = t.toScalar().toByte()
+
+    /// Convert a scalar tensor to a signed byte value
+    member t.toSByte() = t.toScalar().toSByte()
+
+    /// Convert a scalar tensor to an int16 value
+    member t.toInt16() = t.toScalar().toInt16()
+
+    /// Convert a scalar tensor to an int32 value
+    member t.toInt32() = t.toScalar().toInt32()
+
+    /// Convert a scalar tensor to an int64 value
+    member t.toInt64() = t.toScalar().toInt64()
+
+    /// Convert a scalar tensor to a boolean value
+    member t.toBool() = t.toScalar().toBool()
+
+    /// Returns the size in bytes of an individual element in this tensor. Depending on dtype, backend configuration, this is not guaranteed to be correct and can behave differently in different runtime environments.
+    member t.elementSize =
+        let bitsPerElement =
+            match t.backend, t.dtype with
+            | Backend.Reference, Dtype.BFloat16 -> 32 // Backed by float32
+            | Backend.Reference, Dtype.Float16 -> 32 // Backed by float32
+            | Backend.Reference, Dtype.Float32 -> 32
+            | Backend.Reference, Dtype.Float64 -> 64
+            | Backend.Reference, Dtype.Int8 -> 8
+            | Backend.Reference, Dtype.Byte -> 8
+            | Backend.Reference, Dtype.Int16 -> 16
+            | Backend.Reference, Dtype.Int32 -> 32
+            | Backend.Reference, Dtype.Int64 -> 64
+            | Backend.Reference, Dtype.Bool -> 8 // Not reliable https://stackoverflow.com/a/28515361
+            | Backend.Torch, Dtype.BFloat16 -> 16
+            | Backend.Torch, Dtype.Float16 -> 16
+            | Backend.Torch, Dtype.Float32 -> 32
+            | Backend.Torch, Dtype.Float64 -> 64
+            | Backend.Torch, Dtype.Int8 -> 8
+            | Backend.Torch, Dtype.Byte -> 8
+            | Backend.Torch, Dtype.Int16 -> 16
+            | Backend.Torch, Dtype.Int32 -> 32
+            | Backend.Torch, Dtype.Int64 -> 64
+            | Backend.Torch, Dtype.Bool -> 8 // https://github.com/pytorch/pytorch/issues/41571
+            | _ -> failwithf "Unknown backend, dtype configuration to compute memory size"
+        bitsPerElement / 8
+
+    /// Returns the size in bytes of the total memory used by this tensor. Depending on dtype, backend configuration, this is not guaranteed to be correct and can behave differently in different runtime environments.
+    member t.memorySize = (int64 t.nelement) * (int64 t.elementSize)
+
+    /// Indicates if two tensors have the same shape and all corresponding elements are equal within the
+    /// given tolerances.
+    member t.allclose(tensor:Tensor, ?relativeTolerance, ?absoluteTolerance) =
+        let relativeTolerance = defaultArg relativeTolerance 1e-5
+        let absoluteTolerance = defaultArg absoluteTolerance 1e-8
+        t.primalRaw.AllClose(tensor.primalRaw, relativeTolerance, absoluteTolerance)
+
+    /// Returns a new tensor filled with '0' values for the given shape, element type and configuration, defaulting to the 
+    /// shape and configuration of the input tensor.
+    member a.zerosLike(?shape:seq<int>, ?device, ?dtype, ?backend) = 
+        let shape = defaultArg shape (a.shape |> Array.toSeq)
+        TensorC(a.primalRaw.ZerosLike(shape |> Array.ofSeq, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a new tensor filled with '1' values for the given shape, element type and configuration, defaulting to the 
+    /// shape and configuration of the input tensor.
+    member a.onesLike(?shape:seq<int>, ?device, ?dtype, ?backend) = 
+        let shape = defaultArg shape (a.shape |> Array.toSeq)
+        TensorC(a.primalRaw.OnesLike(shape |> Array.ofSeq, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a new tensor filled with the given scalar value for the given shape, element type and configuration, defaulting to the 
+    /// shape and configuration of the input tensor.
+    member a.fullLike(value:scalar, ?shape:seq<int>, ?device, ?dtype, ?backend) = 
+        let shape = defaultArg shape (a.shape |> Array.toSeq)
+        TensorC(a.primalRaw.FullLike(shape |> Array.ofSeq, value, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a new scalar tensor for the given shape, element type and configuration, defaulting to the 
+    /// shape and configuration of the input tensor.
+    member a.scalarLike(scalar:scalar, ?device, ?dtype, ?backend) = 
+        a.fullLike(scalar, [], ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// Returns a new tensor with random values drawn from the uniform distribution [0,1) for the
+    /// given shape, element type and configuration, defaulting to the shape and configuration of the input tensor.
+    member a.randLike(?shape:seq<int>, ?device, ?dtype, ?backend) = 
+        let shape = defaultArg shape (a.shape |> Array.toSeq)
+        TensorC(a.primalRaw.RandomLike((shape |> Array.ofSeq), ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a new tensor with random values drawn from the standard normal distribution, for the
+
+    /// given shape, element type and configuration, defaulting to the shape and configuration of the input tensor.
+    member a.randnLike(?shape:seq<int>, ?device, ?dtype, ?backend) = 
+        let shape = defaultArg shape (a.shape |> Array.toSeq)
+        TensorC(a.primalRaw.RandomNormalLike(shape |> Array.ofSeq, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a new tensor with random integer values drawn from the given range, for the
+    /// given shape, element type and configuration, defaulting to the shape and configuration of the input tensor.
+    member a.randintLike(low:int, high:int, ?shape:seq<int>, ?device, ?dtype, ?backend) = 
+        let shape = defaultArg shape (a.shape |> Array.toSeq)
+        TensorC(a.primalRaw.RandomIntLike(shape |> Array.ofSeq, low, high, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a scalar '0' tensor for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.zeroLike(?device, ?dtype, ?backend) = TensorC(a.primalRaw.ZeroLike(?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a scalar '1' tensor for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.oneLike(?device, ?dtype, ?backend) = TensorC(a.primalRaw.OneLike(?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.arange"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.arangeLike(endVal:float, ?startVal:float, ?step:float, ?device, ?dtype, ?backend) =
+        let startVal = defaultArg startVal 0.
+        let step = defaultArg step 1.
+        let length = (endVal - startVal) / step |> ceil |> int
+        let v = Array.init length (fun i -> startVal + float(i) * step)
+        a.like(box v, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.arange"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.arangeLike(endVal:int, ?startVal:int, ?step:int, ?device, ?dtype, ?backend) =
+        let endVal = endVal |> float
+        let startVal = defaultArg startVal 0 |> float
+        let step = defaultArg step 1 |> float
+        let dtype = defaultArg dtype Dtype.Int32
+        a.arangeLike(endVal=endVal, startVal=startVal, step=step, ?device=device, dtype=dtype, ?backend=backend)
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.linspace"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.linspaceLike(startVal:float, endVal:float, steps:int, ?device, ?dtype, ?backend) =
+        let stepVal = (endVal - startVal) / (float (steps - 1))
+        let v = Array.init steps (fun i -> startVal + (float i) * stepVal)
+        a.like(box v, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.linspace"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.linspaceLike(startVal:int, endVal:int, steps:int, ?device, ?dtype, ?backend) =
+        a.linspaceLike(startVal |> float, endVal |> float, steps, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.logspace"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.logspaceLike(startVal:float, endVal:float, steps:int, ?baseVal:float, ?device, ?dtype, ?backend) =
+        let baseVal = defaultArg baseVal 10.
+        a.scalarLike(baseVal, ?device=device, ?dtype=dtype, ?backend=backend).pow(a.linspaceLike(startVal, endVal, steps, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.logspace"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.logspaceLike(startVal:int, endVal:int, steps:int, ?baseVal:int, ?device, ?dtype, ?backend) =
+        let baseVal = defaultArg baseVal 10
+        a.logspaceLike(startVal |> float, endVal |> float, steps, baseVal |> float, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>
+    ///  Returns a tensor from the .NET data in <c>value</c> for the given element type and configuration, defaulting to
+    ///  the element type and configuration of the input tensor.
+    /// </summary>
+    member a.like(value, ?device, ?dtype, ?backend) = TensorC(a.primalRaw.CreateLike(value, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor with underlying storage copied.</summary>
+    /// <remarks>
+    ///   This method discards differentiability and returns a constant tensor.
+    /// </remarks>
+    member a.clone() = TensorC(a.primalRaw.Clone())
+
+    /// Returns a tensor in the manner of <see cref="M:TensorMath.dsharp.onehot"/> for the given element type and configuration, defaulting to
+    /// the element type and configuration of the input tensor.
+    member a.onehotLike(length:int, hot:int, ?device, ?dtype, ?backend) =
+        if hot < 0 || hot >= length then failwithf "Expecting 0 <= hot < length"
+        a.zerosLike([|length|], ?device=device, ?dtype=dtype, ?backend=backend).addSlice([|hot|], a.onesLike([|1|], ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Computes element-wise \(a &lt; b\), returning a boolean tensor containing a <c>true</c> at each location where the comparison is true</summary>
+    member a.lt(b:Tensor) = TensorC(a.primalRaw.LtTT(b.primalRaw))
+
+    /// <summary>Computes element-wise \(a &gt; b\), returning a boolean tensor containing a <c>true</c> at each location where the comparison is true</summary>
+    member a.gt(b:Tensor) = TensorC(a.primalRaw.GtTT(b.primalRaw))
+
+    /// <summary>Computes element-wise \(a \leq b\), returning a boolean tensor containing a <c>true</c> at each location where the comparison is true</summary>
+    member a.le(b:Tensor) =TensorC(a.primalRaw.LeTT(b.primalRaw))
+
+    /// <summary>Computes element-wise \(a \geq b\), returning a boolean tensor containing a <c>true</c> at each location where the comparison is true</summary>
+    member a.ge(b:Tensor) = TensorC(a.primalRaw.GeTT(b.primalRaw))
+
+    /// <summary>Computes element-wise \(a = b\), returning a boolean tensor containing a <c>true</c> at each location where the comparison is true</summary>
+    member a.eq(b:Tensor) = TensorC(a.primalRaw.EqTT(b.primalRaw))
+
+    /// <summary>Computes element-wise \(a \neq b\), returning a boolean tensor containing a <c>true</c> at each location where the comparison is true</summary>
+    member a.ne(b:Tensor) = let e = a.eq(b) in e.lt(e.onesLike()) // Implement "not equal" relying on "equal"
+
+    /// <summary>Returns a new tensor with boolean elements representing if each element is +/-INF or not.</summary>
+    member a.isinf() = TensorC(a.primalRaw.IsInfT())
+
+    /// <summary>Returns a new tensor with boolean elements representing if each element is NaN or not. Complex values are considered NaN when either their real and/or imaginary part is NaN.</summary>
+    member a.isnan() = TensorC(a.primalRaw.IsNaNT())
+
+    /// Gets if any value in the tensor is +/- INF.
+    member a.hasinf() = a.isinf().sum() > a.zeroLike(dtype=Dtype.Int64)
+
+    /// Gets if any value in the tensor is NaN.
+    member a.hasnan() = a.isnan().sum() > a.zeroLike(dtype=Dtype.Int64)
+
+    /// Gets if any value in the tensor is NaN or +/- INF.
+    member a.hasinfnan() = a.hasinf() || a.hasnan()
+
+    /// Gets the index of a maximum value in the tensor.
+    member a.argmax() =
+        a.primalRaw.MaxIndexT()
+
+    /// <summary>Returns the indexes of maximum values of the primal of the tensor, reducing the given dimension.</summary>
+    /// <remarks>The resulting tensor does not participate in reverse or forward differentiation. It can be used as input to another operation such as <c>dsharp.gather</c>.</remarks>
+    member a.argmax(dim:int, ?keepDim: bool) =
+        let keepDim = defaultArg keepDim false
+        Shape.checkCanMinMaxReduce dim keepDim a.shape |> ignore
+        a.primalRaw.MaxReduceT(dim, keepdim=keepDim) |> snd |> TensorC
+
+    /// Gets the index of a minimum value in the tensor.
+    member a.argmin() =
+        a.primalRaw.MinIndexT()
+
+    /// <summary>Returns the indexes of minimum values of the primal of the tensor, reducing the given dimension.</summary>
+    /// <remarks>The resulting tensor does not participate in reverse or forward differentiation. It can be used as input to another operation such as <c>dsharp.gather</c>.</remarks>
+    member a.argmin(dim: int, ?keepDim: bool) =
+        let keepDim = defaultArg keepDim false
+        Shape.checkCanMinMaxReduce dim keepDim a.shape |> ignore
+        a.primalRaw.MinReduceT(dim, keepdim=keepDim) |> snd |> TensorC
+
+    /// Returns the maximum value along the given dimension of all elements in the input tensor.
+    member a.max(dim:int, ?keepDim:bool) =
+        let keepdim = defaultArg keepDim false
+        let indices = a.argmax(dim=dim, keepDim=true)
+        let ret:Tensor = a.gather(dim, indices)
+        if keepdim then ret else ret.squeeze(dim)
+
+    /// Returns the minimum value along the given dimension of all elements in the input tensor.
+    member a.min(dim:int, ?keepDim:bool) =
+        let keepdim = defaultArg keepDim false
+        let indices = a.argmin(dim=dim, keepDim=true)
+        let ret:Tensor = a.gather(dim, indices)
+        if keepdim then ret else ret.squeeze(dim)
+
+    /// Returns the maximum value of all elements in the input tensor.
+    member a.max() = if a.dim = 0 then a else a[a.argmax()]
+
+    /// Returns the minimum value of all elements in the input tensor.
+    member a.min() = if a.dim = 0 then a else a[a.argmin()]
+
+    /// Returns the element-wise maximum of the elements in the two tensors.
+    member a.max(b:Tensor) = 
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "max" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                aCast.max(bCast)
+        elif a.dtype = Dtype.Byte || a.dtype = Dtype.Bool then
+            let result:Tensor = a.cast(Dtype.Int16).max(b.cast(Dtype.Int16))
+            result.cast(a.dtype)
+        else
+            let result:Tensor = ((a + b) + Tensor.Abs(b - a)) / 2
+            if result.dtype <> a.dtype then result.cast(a.dtype) else result
+
+    /// Returns the element-wise minimum of the elements in the two tensors.
+    member a.min(b:Tensor) = 
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "min" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                aCast.min(bCast)
+        elif a.dtype = Dtype.Byte || a.dtype = Dtype.Bool then
+            let result:Tensor = a.cast(Dtype.Int16).min(b.cast(Dtype.Int16))
+            result.cast(a.dtype)
+        else
+            let result:Tensor = ((a + b) - Tensor.Abs(a - b)) / 2
+            if result.dtype <> a.dtype then result.cast(a.dtype) else result
+
+    /// <summary>
+    ///  Returns a tensor with the diagonal elements with respect to <c>dim1</c> and <c>dim2</c>.
+    ///  The argument offset controls which diagonal to consider.
+    /// </summary>
+    member a.diagonal(?offset:int, ?dim1:int, ?dim2:int) =
+        let a = a
+        // TODO: The following can be slow, especially for reverse mode differentiation of the diagonal of a large tensor. Consider a faster implementation.
+        if a.dim < 2 then failwithf "Tensor must be at least 2-dimensional"
+        let offset = defaultArg offset 0
+        let dim1 = defaultArg dim1 0
+        let dim2 = defaultArg dim2 1
+        let mutable finished = false
+        let mutable d = []
+        let mutable i = 0
+        let mutable j = offset
+        while not finished do
+            if i >= a.shape[dim1] || j >= a.shape[dim2] then 
+                finished <- true
+            elif j >= 0 then
+                // let bounds = array2D [[i0min; i0max; i0given]; [i1min; i1max; i1given]; [i2min; i2max; i2given]; [i3min; i3max; i3given]]
+                let bounds = Array2D.init (a.dim) 3 (fun ii jj -> 
+                                                        if ii = dim1 then
+                                                            if jj < 2 then i else 1
+                                                        elif ii = dim2 then
+                                                            if jj < 2 then j else 1
+                                                        else
+                                                            if jj = 0 then 0
+                                                            elif jj = 1 then a.shape[ii]-1
+                                                            else 0
+                                                        )
+                d <- [a.GetSlice(bounds)] |> List.append d
+            i <- i + 1
+            j <- j + 1
+        if d |> List.isEmpty then failwithf "Empty diagonal"
+        Tensor.stack(d)
+
+    /// <summary>Returns the sum of the elements of the diagonal of the input 2-D matrix.</summary>
+    member a.trace() = let d:Tensor = a.diagonal() in d.sum()
+
+    /// <summary>Returns a new view of the object tensor with singleton dimensions expanded to a larger size.</summary>
+    /// <remarks>
+    ///   <para>Passing -1 as the size for a dimension means not changing the size of that dimension.</para>
+    ///   <para>The tensor can be also expanded to a larger number of dimensions, and the new ones will be appended 
+    ///         at the front. For the new dimensions, the size cannot be set to -1.
+    ///   </para>
+    ///   <para>
+    ///      Expanding a tensor does not allocate new memory, but only creates a new view on the existing tensor
+    ///      where a dimension of size one is expanded to a larger size by setting the stride to 0. Any dimension
+    ///      of size 1 can be expanded to an arbitrary value without allocating new memory.
+    ///   </para>
+    /// </remarks>
+    member a.expand(newShape:seq<int>) =
+        let newShape = newShape|>Shape.create
+        if a.shape = newShape then a 
+        else
+            let newShape = Shape.completeExpand a.shape newShape  // Handles -1 semantics
+            Shape.checkCanExpand a.shape newShape
+            match a with
+            | TensorC(ap) -> TensorC(ap.Expand(newShape))
+
+    /// <summary>Expand this tensor to the same size as the other.</summary>
+    member a.expandAs(b:Tensor) = a.expand(b.shape)
+
+    /// <summary>Convert tensor to an image tensor with shape Channels x Height x Width</summary>
+    member t.toImage(?pixelMin:double, ?pixelMax:double, ?normalize:bool, ?gridCols:int) =
+        let pixelMin = defaultArg pixelMin 0.
+        let pixelMax = defaultArg pixelMax 1.
+        let normalize = defaultArg normalize false
+        if t.dim < 1 || t.dim > 4 then failwithf "Expecting the tensor 1 <= dim (%A) <= 4, received shape %A" t.dim t.shape
+
+        if t.dim = 4 then // we make an image grid
+            let mutable numItems = t.shape[0]
+            let cols = defaultArg gridCols (int(ceil(sqrt(float(numItems)))))
+            if cols < 1 || cols > numItems then failwithf "Expecting 1 <= gridCols (%A) <= %A" cols numItems
+            let mutable rows = 0
+            let mutable items = numItems
+            while items > 0 do
+                rows <- rows + 1
+                items <- items - cols
+            let c, h, w = t.shape[1], t.shape[2], t.shape[3]
+            let mutable tgrid = t.zerosLike([h*rows; w*cols; c])
+            // transform [n, c, h, w] to [n, h, w, c]
+            let t:Tensor = t.transpose(1, 3)
+            let t = t.transpose(2, 1)
+            let mutable i = 0
+            for row=0 to rows-1 do
+                for col=0 to cols-1 do
+                    if i < numItems then
+                        tgrid <- tgrid.addSlice([row*h; col*w; 0], t[i])
+                        i <- i + 1
+            // transform [h, w, c] to [c, h, w]
+            tgrid <- tgrid.transpose(0, 2)
+            tgrid <- tgrid.transpose(1, 2)
+            tgrid.toImage(pixelMin=pixelMin, pixelMax=pixelMax, normalize=normalize)
+        else
+            let mutable pixels = t
+            if t.dim = 1 then
+                pixels <- pixels.view([1; 1; t.nelement])
+                pixels <- pixels.expand([3; -1; -1])
+            elif t.dim = 2 then
+                pixels <- pixels.view([1; t.shape[0]; t.shape[1]])
+                pixels <- pixels.expand([3; -1; -1])
+            else
+                if t.shape[0] = 1 then
+                    pixels <- pixels.expand([3; -1; -1])
+                elif t.shape[0] <> 3 then 
+                    failwithf "Expecting the number of channels (%A) to be 1 or 3" t.shape[0]
+            if pixelMin < 0. || pixelMin > 1. then failwithf "Expecting 0 <= pixelMin (%A) <= 1" pixelMin
+            if pixelMax < 0. || pixelMax > 1. then failwithf "Expecting 0 <= pixelMax (%A) <= 1" pixelMax
+            let pixelRange = pixelMax - pixelMin
+            if pixelRange <= 0. then failwithf "Expecting pixelMin (%A) < pixelMax (%A)" pixelMin pixelMax
+            if normalize then
+                pixels <- pixels.normalize()
+            pixels <- pixelMin + pixels.mul(pixelRange)
+            pixels
+
+    /// <summary>Convert tensor to a grayscale image tensor and return a string representation approximating grayscale values</summary>
+    member t.toImageString(?pixelMin:double, ?pixelMax:double, ?normalize:bool, ?gridCols:int, ?asciiPalette:string) =
+        let asciiPalette = defaultArg asciiPalette """ .'`,^:";~-_+<>i!lI?/\|()1{}[]rcvunxzjftLCJUYXZO0Qoahkbdpqwm*WMB8&%$#@"""
+        let pixels:Tensor = t.toImage(?pixelMin=pixelMin, ?pixelMax=pixelMax, ?normalize=normalize, ?gridCols=gridCols).mean(0) // make it grayscale
+        let numToAscii (numZeroToOne:float) =
+            let c = int (numZeroToOne * float(asciiPalette.Length)) - 1
+            let c = min (asciiPalette.Length - 1) (max 0 c)
+            asciiPalette[c]
+        let h, w = pixels.shape[0], pixels.shape[1]
+        let sb = System.Text.StringBuilder()
+        for y=0 to h-1 do
+            for x=0 to w-1 do
+                sb.Append(numToAscii (float(pixels[y, x]))) |> ignore
+            sb.AppendLine() |> ignore
+        sb.ToString()
+
+    member t.GetSlice(bounds:int[,]) =
+        let t = t
+        if t.dim = 0 then failwith "Cannot slice a scalar Tensor"
+        let fullBounds = t.shapeFullBounds |> Array2D.copy
+        bounds |> Array2D.iteri (fun i j v -> 
+            if j=1 && v >= t.shape[i] then failwithf "Index outside the bounds of Tensor shape %A" t.shape
+            fullBounds[i, j] <- v)
+        if fullBounds = t.shapeFullBounds then t // We don't need to slice as the result of the slicing would be the same with this existing tensor
+        else
+        match t with
+        | TensorC(ap) -> TensorC(ap.GetSlice(fullBounds))
+
+    /// <summary>Get the item at the given index as a scalar tensor.</summary>
+    member t.Item
+        with get([<System.ParamArray>] index:int[]) =
+            if t.dim = 0 then failwith "Cannot index a scalar Tensor"
+            if index.Length > t.dim then failwithf "Expecting an index with <=%i dimensions" t.dim
+            let bounds = Array2D.init index.Length 3 (fun i j -> if j=2 then 1 else index[i])
+            t.GetSlice(bounds)
+
+    /// <summary>
+    /// Creates a new tensor from the raw tensor.
+    /// </summary>
+    /// <param name="rawTensor">The given raw tensor.</param>
+    static member ofRawTensor(rawTensor: RawTensor) = TensorC rawTensor
+
+    /// <summary>
+    /// Creates a new tensor from the given data, using the given element type and configuration.
+    /// </summary>
+    /// <param name="value">The .NET object used to form the initial values for the tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    /// <remarks>The fastest creation technique is a one dimensional array matching the desired dtype. Then use 'view' to reshape.</remarks>
+    static member create(value:obj, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        // Fast paths to create directly from 1D array matching the dtype
+        match value, defaultArg dtype Dtype.Default with
+        | (:? (int32[]) as arr), Dtype.Int32 -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        | (:? (single[]) as arr), Dtype.Float32 -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        | (:? (double[]) as arr), Dtype.Float64 -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        | (:? (int16[]) as arr), Dtype.Int16 -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        | (:? (int64[]) as arr), Dtype.Int64 -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        // Extra type match check is needed to distinguish between arrays holding byte and int8, see https://github.com/dotnet/fsharp/issues/10202
+        | (:? (byte[]) as arr), Dtype.Byte when DataConverter.typesMatch<byte> arr -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        | (:? (int8[]) as arr), Dtype.Int8 when DataConverter.typesMatch<int8> arr -> TensorC(RawTensor.CreateFromFlatArray(arr, shape=[| arr.Length |], ?device=device, ?dtype=dtype, ?backend=backend))
+        | _ -> 
+        // Empty tensor (no data, shape: [0])
+        match value with
+        | :? (seq<obj>) as v when Seq.isEmpty v -> 
+            let result = TensorC(RawTensor.CreateFromFlatArray(Array.zeroCreate<float32> 0, shape=[|0|], ?device=device, dtype=Dtype.Float32, ?backend=backend))
+            let dtype2 = defaultArg dtype Dtype.Default
+            result.cast(dtype=dtype2)
+        | _ ->
+        // Create a new Tensor from a structure holding scalar Tensors. Maintains differentiability.
+        let res = value |> DataConverter.tryFlatArrayAndShape<Tensor> 
+        match res with
+        | Some (tensors, shape) -> 
+            let allScalar = tensors |> Array.forall (fun t -> t.dim = 0)
+            if not allScalar then failwithf "Combining tensors in an array is only supported where all tensors in the array are scalar (zero-dimensional). Check other operations like stack, cat to combine tensors."
+            Tensor.stack(tensors).view(shape)
+        | None ->
+        // General constant tensor
+        TensorC(RawTensor.Create(value, ?device=device, ?dtype=dtype, ?backend=backend))        
+
+    /// <summary>Returns a 2-D tensor with ones on the diagonal and zeros elsewhere.</summary>
+    static member eye(rows:int, ?cols:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        let cols = defaultArg cols rows
+        if rows <= 0 || cols <= 0 then Tensor.create([], ?device=device, ?dtype=dtype, ?backend=backend)
+        else
+            let vals = Array2D.init rows cols (fun i j -> if i = j then 1 else 0)
+            Tensor.create(vals, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Concatenates sequence of tensors along a new dimension.</summary>
+    /// <remarks>All tensors need to be of the same shape.</remarks>
+    /// <param name="tensors">sequence of tensors to concatenate</param>
+    /// <param name="dim">dimension to insert. Has to be between 0 and the number of dimensions of concatenated tensors (inclusive)</param>
+    static member stack(tensors:seq<Tensor>, ?dim:int) = 
+        let dim = defaultArg dim 0 
+        let tensors = tensors |> Seq.toArray
+        let shapes = tensors |> Array.map (fun t -> t.shape)
+        Shape.checkCanStack shapes dim |> ignore
+        match Seq.head tensors with
+        | TensorC(ap) -> TensorC(ap.StackTs((tensors |> Array.map (fun t -> t.primalRaw)), dim))
+
+    /// <summary>Removes a tensor dimension.</summary>
+    /// <param name="dim">The dimension to remove, defaults to 0.</param>
+    /// <returns>Returns an array of all slices along a given dimension.</returns>
+    member a.unstack (?dim:int) =
+        let dim = defaultArg dim 0 
+        Shape.checkCanUnstack a.shape |> ignore
+        match a with
+        | TensorC(ap) -> ap.UnstackT(dim) |> Array.map TensorC
+
+    /// <summary>Concatenates the given sequence of seq tensors in the given dimension.</summary>
+    /// <remarks>All tensors must either have the same shape (except in the concatenating dimension) or be empty.</remarks>
+    /// <param name="tensors">The tensors to concatenate.</param>
+    /// <param name="dim">The dimension over which the tensors are concatenated, defaults to 0.</param>
+    static member cat(tensors:seq<Tensor>, ?dim: int) = 
+        let dim = defaultArg dim 0 
+        let tensors = tensors |> Seq.toArray
+        let shapes = tensors |> Array.map (fun t -> t.shape)
+        Shape.checkCanCat shapes dim |> ignore
+        match Seq.head tensors with
+        | TensorC(ap) -> TensorC(ap.CatTs((tensors |> Array.map (fun t -> t.primalRaw)), dim))
+
+    /// <summary>Splits the tensor into chunks. Each chunk is a view of the original tensor.</summary>
+    /// <param name="sizes">List of sizes for each chunk</param>
+    /// <param name="dim">The dimension along which to split the tensor, defaults to 0.</param>
+    member a.split (sizes: seq<int>, ?dim: int) =
+        let dim = defaultArg dim 0
+        let sizes = sizes |> Seq.toArray
+        match a with
+        | TensorC(ap) -> ap.SplitT(sizes, dim=dim) |> Array.map TensorC
+
+    /// <summary>Pipeline the tensor into a function.</summary>
+    static member inline (-->) (t:Tensor, f:Tensor -> ^a) = f t
+
+    static member inline internal OpUnary(a, fRaw:RawTensor->RawTensor) =
+        match a with
+        | TensorC(ap)           -> TensorC(fRaw(ap))
+
+    static member inline internal OpBinary(a, b, fRaw: RawTensor * RawTensor -> RawTensor) =
+        match a, b with
+        | TensorC(ap),          TensorC(bp)                     -> TensorC(fRaw(ap, bp))
+
+    /// <summary>Each element of the tensor <paramref name="a" /> is added to each corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    static member (+) (a:Tensor, b:Tensor) : Tensor =
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "+" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                aCast + bCast
+        elif a.shape = b.shape then
+            let inline fRaw(a:RawTensor,b) = a.AddTT(b)
+            Tensor.OpBinary(a, b, fRaw)
+        else
+            let newShape = Shape.broadcast2 a.shape b.shape
+            let aExpanded = a.expand(newShape)
+            let bExpanded = b.expand(newShape)
+            aExpanded + bExpanded
+
+    /// <summary>Each element of the tensor <paramref name="a" /> is added to the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member (+) (a:Tensor, b: scalar) =
+        match tryWidenScalar a.dtype b with
+        | ValueSome tnew ->
+            let aCast = a.cast(tnew)
+            let bCast = b.cast(tnew)
+            aCast + bCast
+        | ValueNone ->
+            let inline fRaw(a:RawTensor) = a.AddTT0(b)
+            Tensor.OpUnary(a, fRaw)
+
+    /// <summary>The scalar <paramref name="a" /> is added to each element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member (+) (a: scalar, b:Tensor) : Tensor = b + a
+
+    /// <summary>Each element of the object tensor is added to each corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.add(b:Tensor) : Tensor = a + b
+
+    /// <summary>Each element of the object tensor is added to the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    member a.add(b:scalar) : Tensor = a + b
+
+    /// <summary>Subtracts each element of the tensor <paramref name="b" /> from the corresponding element of the tensor <paramref name="a" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    static member (-) (a:Tensor, b:Tensor) =
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "-" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                aCast - bCast
+        elif a.shape = b.shape then
+            let inline fRaw(a:RawTensor,b) = a.SubTT(b)
+            Tensor.OpBinary(a, b, fRaw)
+        else
+            let newShape = Shape.broadcast2 a.shape b.shape
+            let aExpanded = a.expand(newShape)
+            let bExpanded = b.expand(newShape)
+            aExpanded - bExpanded
+
+    /// <summary>Subtracts the scalar <paramref name="b" /> from the corresponding element of the tensor <paramref name="a" />. The resulting tensor is returned.</summary>
+    static member (-) (a:Tensor, b:scalar) =
+        match tryWidenScalar a.dtype b with
+        | ValueSome tnew ->
+            let aCast = a.cast(tnew)
+            let bCast = b.cast(tnew)
+            aCast - bCast
+        | ValueNone ->
+            let inline fRaw(a:RawTensor) = a.SubTT0(b)
+            Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Subtracts each element of the tensore <paramref name="b" /> from the scalar <paramref name="a" />. The resulting tensor is returned.</summary>
+    static member (-) (a:scalar, b:Tensor) : Tensor =
+        match tryWidenScalar b.dtype a with
+        | ValueSome tnew ->
+            let aCast = a.cast(tnew)
+            let bCast = b.cast(tnew)
+            aCast * bCast
+        | ValueNone ->
+            let inline fRaw(b:RawTensor) = b.SubFromT0T(a)
+            Tensor.OpUnary(b, fRaw)
+
+    /// <summary>Subtracts each element of the object tensor from the corresponding element of the self tensor. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.sub(b:Tensor) = a - b
+
+    /// <summary>Subtracts the scalar <paramref name="b" /> from the corresponding element of the object tensor. The resulting tensor is returned.</summary>
+    member a.sub(b:scalar) = a - b
+
+    /// <summary>Multiplies each element of the tensor <paramref name="a" /> by the corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    static member (*) (a:Tensor, b:Tensor) =
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "*" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                aCast * bCast
+        elif a.shape = b.shape then
+            let inline fRaw(a:RawTensor,b) = a.MulTT(b)
+            Tensor.OpBinary(a, b, fRaw)
+        else
+            let newShape = Shape.broadcast2 a.shape b.shape
+            let aExpanded = a.expand(newShape)
+            let bExpanded = b.expand(newShape)
+            aExpanded * bExpanded
+
+    /// <summary>Multiplies each element of the tensor <paramref name="a" /> by the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member (*) (a:Tensor, b:scalar) =
+        match tryWidenScalar a.dtype b with
+        | ValueSome tnew ->
+            let aCast = a.cast(tnew)
+            let bCast = b.cast(tnew)
+            aCast * bCast
+        | ValueNone ->
+            let inline fRaw(a:RawTensor) = a.MulTT0(b)
+            Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Multiplies the scalar <paramref name="a" /> by each element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member (*) (a:scalar, b:Tensor) = b * a
+
+    /// <summary>Multiplies each element of the object tensor by the corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.mul(b:Tensor) = a * b
+
+    /// <summary>Multiplies each element of the object tensor by the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.mul(b: scalar) = a * b
+
+    /// <summary>Divides each element of the tensor <paramref name="a" /> by the corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    static member (/) (a:Tensor, b:Tensor) =
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "/" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                aCast / bCast
+        elif a.shape = b.shape then
+            let outtype = Dtype.divisionType a.dtype b.dtype
+            let a = a.cast(outtype)
+            let b = b.cast(outtype)
+
+            let inline fRaw(a:RawTensor,b) = a.DivTT(b)
+            Tensor.OpBinary(a, b, fRaw)
+        else
+            let newShape = Shape.broadcast2 a.shape b.shape
+            let aExpanded = a.expand(newShape)
+            let bExpanded = b.expand(newShape)
+            aExpanded / bExpanded
+
+    /// <summary>Divides each element of the tensor <paramref name="a" /> by the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member (/) (a:Tensor, b:scalar) =
+        let outtype = widenScalarForDivision a.dtype b.dtype
+        let a = a.cast(outtype)
+        let b = b.cast(outtype)
+
+        let inline fRaw(a:RawTensor) = a.DivTT0(b)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Divides the scalar <paramref name="a" /> by the each element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member (/) (a:scalar, b:Tensor) =
+        let outtype = widenScalarForDivision b.dtype a.dtype
+        let a = a.cast(outtype)
+        let b = b.cast(outtype)
+
+        let inline fRaw(b:RawTensor) = b.DivFromT0T(a)
+        Tensor.OpUnary(b, fRaw)
+
+    /// <summary>Divides each element of the object tensor by the corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.div(b:Tensor) = a / b
+
+    /// <summary>Divides each element of the object tensor by the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.div(b:scalar) = a / b
+
+    static member internal powImpl (a:Tensor, b:Tensor) =
+        if a.dtype <> b.dtype then
+            match Dtype.widen a.dtype b.dtype with
+            | None -> opNotSupported "Pow" a.dtype b.dtype 
+            | Some tnew ->
+                let aCast = a.cast(tnew)
+                let bCast = b.cast(tnew)
+                Tensor.Pow (aCast, bCast)
+        elif a.shape = b.shape then
+            let inline fRaw(a:RawTensor,b) = a.PowTT(b)
+            Tensor.OpBinary(a, b, fRaw)
+        else
+            let newShape = Shape.broadcast2 a.shape b.shape
+            let aExpanded = a.expand(newShape)
+            let bExpanded = b.expand(newShape)
+            Tensor.Pow(aExpanded, bExpanded)
+
+    static member internal powImpl (a:Tensor, b:scalar) =
+        match tryWidenScalar a.dtype b with
+        | ValueSome tnew ->
+            let aCast = a.cast(tnew)
+            let bCast = b.cast(tnew)
+            Tensor.powImpl(aCast, bCast)
+        | ValueNone ->
+            let inline fRaw(a:RawTensor) = a.PowTT0(b)
+            Tensor.OpUnary(a, fRaw)
+
+    static member internal powImpl (a:scalar, b:Tensor) =
+        match tryWidenScalar b.dtype a with
+        | ValueSome tnew ->
+            let aCast = a.cast(tnew)
+            let bCast = b.cast(tnew)
+            Tensor.powImpl(aCast, bCast)
+        | ValueNone ->
+            let inline fRaw(b:RawTensor) = b.PowFromT0T(a)
+            Tensor.OpUnary(b, fRaw)
+
+    /// <summary>Raises each element of the tensor <paramref name="a" /> to the power of the corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    static member Pow (a:Tensor, b:Tensor) = Tensor.powImpl(a, b)
+
+    /// <summary>Raises each element of the tensor <paramref name="a" /> to the power of the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member Pow (a:Tensor, b: scalar) = Tensor.powImpl(a, b)
+
+    /// <summary>Raises each element of the tensor <paramref name="a" /> to the power of the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member Pow (a:Tensor, b:float) = Tensor.powImpl(a, (b :> scalar))
+
+    /// <summary>Raises each element of the tensor <paramref name="a" /> to the power of the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member Pow (a:Tensor, b:int) = Tensor.powImpl(a, (b :> scalar))
+
+    /// <summary>Raises the scalar <paramref name="a" /> to the power of each element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member Pow (a:scalar, b:Tensor) = Tensor.powImpl(a, b)
+
+    /// <summary>Raises the scalar <paramref name="a" /> to the power of each element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member Pow (a:float, b:Tensor) = Tensor.powImpl((a :> scalar), b)
+
+    /// <summary>Raises the scalar <paramref name="a" /> to the power of each element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    static member Pow (a:int, b:Tensor) = Tensor.powImpl((a :> scalar), b)
+
+    /// <summary>Raises each element of the self tensor to the power of each corresponding element of the tensor <paramref name="b" />. The resulting tensor is returned.</summary>
+    /// <remarks>The shapes of the two tensors must be broadcastable.</remarks>
+    member a.pow(b:Tensor) = Tensor.powImpl(a, b)
+
+    /// <summary>Raises each element of the self tensor to the power of the scalar <paramref name="b" />. The resulting tensor is returned.</summary>
+    member a.pow(b: scalar) = Tensor.powImpl(a, b)
+
+    /// <summary>Matrix product of two tensors.</summary>
+    ///
+    /// <remarks>
+    /// <para>
+    /// The behavior depends on the dimensionality of the tensors as follows:
+    /// </para>
+    /// 
+    /// <para>
+    /// If both tensors are 1-dimensional, the dot product (scalar) is returned.
+    /// </para>
+    /// 
+    /// <para>
+    /// If both arguments are 2-dimensional, the matrix-matrix product is returned.
+    /// </para>
+    /// 
+    /// <para>
+    /// If the first argument is 1-dimensional and the second argument is 2-dimensional, a 1 is prepended to its dimension for the purpose of the matrix multiply. After the matrix multiply, the prepended dimension is removed.
+    /// </para>
+    /// 
+    /// <para>
+    ///  If the first argument is 2-dimensional and the second argument is 1-dimensional, the matrix-vector product is returned.
+    /// </para>
+    /// 
+    /// <para>
+    ///  If both arguments are at least 1-dimensional and at least one argument is N-dimensional (where N > 2), then a 
+    ///  batched matrix multiply is returned. If the first argument is 1-dimensional, a 1 is prepended to its dimension for the
+    ///  purpose of the batched matrix multiply and removed after. If the second argument is 1-dimensional, a 1 is appended to
+    ///  its dimension for the purpose of the batched matrix multiple and removed after. The non-matrix (i.e. batch) dimensions
+    ///  are broadcasted (and thus must be broadcastable). For example, if input is a (j \times 1 \times n \times m)(j×1×n×m)
+    ///  tensor and other is a (k \times m \times p)(k×m×p) tensor, out will be an (j \times k \times n \times p)(j×k×n×p)
+    ///  tensor.
+    /// </para>
+    /// </remarks>
+    member a.matmul (b:Tensor) : Tensor =
+        if a.dim = 1 && b.dim = 1 then a.dot(b) 
+        // Increase to at least 2x2
+        elif a.dim = 1 && b.dim > 1 then a.unsqueeze(0).matmul(b).squeeze(b.dim-2)
+        elif a.dim > 1 && b.dim = 1 then a.matmul(b.unsqueeze(1)).squeeze(a.dim-1)
+        else
+        let (aBatchPart, aMatrixPart), (bBatchPart, bMatrixPart) = Shape.checkCanMatmul a.shape b.shape
+        if aBatchPart = bBatchPart then
+            let inline fRaw(a:RawTensor,b) = a.MatMulTT(b)
+            Tensor.OpBinary(a, b, fRaw)
+        else
+            let newBatchPart = Shape.broadcast2 aBatchPart bBatchPart
+            let aNewShape = Array.append newBatchPart aMatrixPart
+            let bNewShape = Array.append newBatchPart bMatrixPart
+            let aExpanded = a.expand(aNewShape)
+            let bExpanded = b.expand(bNewShape)
+            aExpanded.matmul(bExpanded)
+
+    /// <summary>Computes the dot product (inner product) of two vector (1d-tensors).</summary>
+    /// <param name="b">The vector to multiply this tensor by (1d-tensor).</param>
+    /// <remarks>This function does not broadcast and expects this tensor to be a vector (1d-tensor).   
+    /// The tensors must have the same number of elements.
+    /// </remarks>
+    member a.dot(b:Tensor) =
+        Shape.checkCanDot a.shape b.shape
+        let a:Tensor = a.view([1;a.nelement])
+        let b:Tensor = b.view([b.nelement;1])
+        a.matmul(b).view([])
+
+    /// <summary>Returns a new tensor with the negative of the elements of <paramref name="a" />.</summary>
+    static member (~-) (a:Tensor) =
+        let inline fRaw(a:RawTensor) = a.NegT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a new tensor with the negative of the elements of the object tensor.</summary>
+    member a.neg() = -a
+
+    /// <summary>Returns the sum of all elements in the input tensor.</summary>
+    /// <param name="dtype">The desired data type of returned tensor.</param>
+    member a.sum(?dtype: Dtype) =
+        let inline fRaw(a:RawTensor) = a.SumT(?resultType=dtype)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns the sum of each row of the input tensor in the given dimension dim. If dim is a list of dimensions, reduce over all of them.</summary>
+    /// <remarks>If keepdim is <c>true</c>, the output tensor is of the same size as input except in the dimension dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 fewer dimension.</remarks>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    /// <param name="dtype">The desired data type of returned tensor.</param>
+    member a.sum(dim:int, ?keepDim:bool, ?dtype: Dtype) =
+        let keepDim = defaultArg keepDim false
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        let res =
+            if dim = 0 && a.dim = 0 then a
+            else
+               if dim >= a.dim || dim < 0 then failwithf "Expecting 0 < dim (%A) < %A" dim a.dim
+               let inline fRaw(a:RawTensor) = a.SumTDim(dim=dim, ?resultType=dtype)
+               Tensor.OpUnary(a, fRaw)
+        let res2 = if keepDim then res.unsqueeze(dim) else res
+        res2.castAfterSummation(?dtype=dtype)
+
+    /// <summary>Sum this tensor to size <paramref name="newShape" />, which must be broadcastable to this tensor size.</summary>
+    member a.sumToSize(newShape:int[], ?dtype: Dtype) =
+        let oldShape = a.shape
+        if oldShape = newShape then
+            a.cast(defaultArg dtype a.dtype.SummationType)
+        elif newShape.Length = 0 then
+            a.sum(?dtype=dtype)
+        else
+            Shape.checkCanExpand newShape oldShape
+            let trim = oldShape.Length - newShape.Length
+            let mutable result = a.cast(a.dtype.SummationType)
+            // collapse the eliminated dimensions
+            for _dim in 0 .. trim-1 do 
+                result <- result.sum(0, keepDim=false)
+            // reduce the squeezed dimensions
+            for dim in 0 .. newShape.Length-1 do 
+                if oldShape[trim+dim] <> newShape[dim] then 
+                    result <- result.sum(dim, keepDim=true)
+            result.castAfterSummation(?dtype=dtype)
+
+    /// <summary>Returns the mean value of all elements in the input tensor</summary>
+    member a.mean() = a.sum() / a.nelement
+
+    /// <summary>Returns the mean value of each row of the input tensor in the given dimension dim.</summary>
+    /// <remarks>If keepdim is True, the output tensor is of the same size as input except in the dimension dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 fewer dimension.</remarks>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    member a.mean(dim:int, ?keepDim:bool) = 
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        if dim = 0 && a.dim = 0 then a
+        else 
+           let sm = a.sum(dim, ?keepDim=keepDim)
+           let dv = sm / a.shape[dim]
+           dv
+
+    /// <summary>Returns the variance of all elements in the input tensor.</summary>
+    /// <remarks>If unbiased is False, then the variance will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.</remarks>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    member a.var(?unbiased:bool) = 
+        // This is the two-pass algorithm, see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+        let unbiased = defaultArg unbiased true  // Use Bessel's correction if unbiased=true
+        let n = if unbiased then a.nelement - 1 else a.nelement
+        let a' = a - a.mean() in (a' * a').sum() / n
+
+    /// <summary>Returns the variance of each row of the input tensor in the given dimension dim.</summary>
+    /// <remarks>
+    ///   <para>If keepdim is True, the output tensor is of the same size as input except in the dimension dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 fewer dimension(s).</para>
+    ///   <para>If unbiased is False, then the variance will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.</para>
+    /// </remarks>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    member a.var(dim:int, ?keepDim:bool, ?unbiased:bool) =
+        // This is the two-pass algorithm, see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+        let unbiased = defaultArg unbiased true  // Use Bessel's correction if unbiased=true
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        let n = if unbiased then a.shape[dim] - 1 else a.shape[dim]
+        let a' = a - a.mean(dim=dim, keepDim=true) in (a' * a').sum(dim=dim, ?keepDim=keepDim) / n
+
+    /// <summary>Returns the standard deviation of each row of the input tensor in the given dimension dim.</summary>
+    /// <remarks>
+    ///   <para>If keepdim is True, the output tensor is of the same size as input except in the dimension dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 fewer dimension(s).</para>
+    ///   <para>If unbiased is False, then the standard deviation will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.</para>
+    /// </remarks>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    member a.std(dim, ?keepDim, ?unbiased) = a.var(dim, ?keepDim=keepDim, ?unbiased=unbiased) |> Tensor.Sqrt
+
+    /// <summary>Returns the standard deviation of all elements in the input tensor.</summary>
+    /// <remarks>If unbiased is False, then the standard deviation will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.</remarks>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    member a.std(?unbiased) = a.var(?unbiased=unbiased) |> Tensor.Sqrt
+
+    /// <summary>
+    /// Estimates the covariance matrix of the given tensor. The tensor's first
+    /// dimension should index variables and the second dimension should
+    /// index observations for each variable.
+    /// </summary>
+    /// <remarks>
+    /// If no weights are given, the covariance between variables \(x\) and \(y\) is
+    ///  \[cov(x,y)= \frac{\sum^{N}_{i = 1}(x_{i} - \mu_x)(y_{i} - \mu_y)}{N~-~\text{correction}}\]
+    /// where \(\mu_x\) and \(\mu_y\) are the sample means.
+    /// 
+    /// If there are fweights or aweights then the covariance is
+    /// \[cov(x,y)=\frac{\sum^{N}_{i = 1}w_i(x_{i} - \mu_x^*)(y_{i} - \mu_y^*)}{\text{normalization factor}}\]
+    /// where \(w\) is either fweights or aweights if one weight type is provided.
+    /// If both weight types are provided \(w=\text{fweights}\times\text{aweights}\). 
+    /// \(\mu_x^* = \frac{\sum^{N}_{i = 1}w_ix_{i} }{\sum^{N}_{i = 1}w_i}\)
+    /// is the weighted mean of variables.
+    /// The normalization factor is \(\sum^{N}_{i=1} w_i\) if only fweights are provided or if aweights are provided and <c>correction=0</c>. 
+    /// Otherwise if aweights \(aw\) are provided the normalization factor is
+    ///  \(\sum^N_{i=1} w_i - \text{correction}\times\frac{\sum^N_{i=1} w_i aw_i}{\sum^N_{i=1} w_i}\) 
+    /// </remarks>
+    /// <param name="correction">Difference between the sample size and the sample degrees of freedom. Defaults to 1 (Bessel's correction).</param>
+    /// <param name="fweights">Frequency weights represent the number of times each observation was observed. 
+    /// Should be given as a tensor of integers. Defaults to no weights.</param>
+    /// <param name="aweights">Relative importance weights, larger weights for observations that
+    /// should have a larger effect on the estimate. 
+    /// Should be given as a tensor of floating point numbers. Defaults to no weights.</param>
+    /// <returns>Returns a square tensor representing the covariance matrix.
+    ///  Given a tensor with \(N\) variables \(X=[x_1,x_2,\ldots,x_N]\) the
+    /// \(C_{i,j}\) entry on the covariance matrix is the covariance between
+    /// \(x_i\) and \(x_j\).
+    /// </returns>
+    /// <example id="tensor-covariance1">
+    /// <code lang="fsharp">
+    /// let x = dsharp.tensor([0.0;3.4;5.0])
+    /// let y = dsharp.tensor([1.0;2.3;-3.0])
+    /// let xy = dsharp.stack([x;y])
+    /// xy.cov()
+    /// </code>
+    /// Evaluates to
+    /// <code>
+    /// tensor([[ 6.5200, -4.0100],
+    ///         [-4.0100,  7.6300]])
+    /// </code>
+    /// </example>
+    member a.cov(?correction:int64, ?fweights:Tensor, ?aweights:Tensor) =
+        if a.dim > 2 then 
+            failwith $"Expected input to have two or fewer dimensions but input.dim is {a.dim}"
+        if a.dtype = Dtype.Bool then failwith $"bool dtype is not supported for input"
+        let mutable input = if a.dim < 2 then a.view([1;-1]) else a
+        let correction = defaultArg correction (int64 1)
+        let nObservations = input[0].nelement
+        let checkWeightDims name (w: Tensor) =
+            if w.dim > 1 then
+                failwith $"{name} should be scalar or 1D. {name}.dim is {w.dim}."
+            if w.nelement <> nObservations then
+                let error =
+                    $"The number of columns in the input tensor should be the same as the number of elements in {name}." +
+                    $"There are {nObservations} columns in input and {w.nelement} elements in {name}." 
+                failwith error
+            if w.nelement > 0 && w.min().le(w.zeroLike()).toBool() then failwith $"{name} cannot be negative"
+        let fweights = 
+            match fweights with
+            | None -> None
+            | Some fw ->
+                checkWeightDims "fweights" fw
+                match fw.dtype with
+                | Dtype.Integral -> Some fw
+                | _ -> failwith $"fweights.dtype should be integral but it is {fw.dtype}."
+        let aweights = 
+            match aweights with
+            | None -> None
+            | Some aw ->
+                checkWeightDims "aweights" aw
+                match aw.dtype with
+                | Dtype.FloatingPoint -> Some aw
+                | _ -> failwith $"aweights.dtype should be floating point but it is {aw.dtype}."
+        let w =
+            match fweights, aweights with
+            | None, None -> None
+            | Some fw, None -> Some fw
+            | None, Some aw -> Some aw
+            | Some fw, Some aw -> Some (fw * aw)
+        let wSum =
+            match w with
+            | None -> Tensor.create(nObservations, device=input.device, dtype=input.dtype, backend=input.backend)
+            | Some w -> w.sum()
+        if w.IsSome && wSum.eq(wSum.zeroLike()).toBool() then 
+            failwith "weights cannot be normalized because they sum to zero"
+        let avg =
+            match w with
+            | None -> input.mean(dim=1)
+            | Some w -> (input * w).sum(dim=1) / wSum
+        let normFactor =
+            let nf =
+                match w, aweights, correction <> int64 0 with
+                | Some w, Some aweights, true ->
+                    wSum - correction * (w * aweights).sum() / wSum
+                | _ -> wSum - correction
+            if nf.le(nf.zeroLike()).toBool() then 
+                printfn $"Warning: degress of freedom <= 0"
+                nf.zeroLike() 
+            else nf
+        input <- input - avg.unsqueeze(1)
+        let cov = 
+            match w with
+            | None -> input.matmul(input.transpose())
+            | Some w -> input.matmul((input * w).transpose())
+        cov.div(normFactor).squeeze()
+
+    /// <summary>
+    /// Estimates the Pearson correlation coefficient matrix for the given tensor. The tensor's first
+    /// dimension should index variables and the second dimension should
+    /// index observations for each variable.
+    /// </summary>
+    /// <returns>
+    /// The correlation coefficient matrix \(R\) is computed from the covariance
+    /// matrix 
+    /// Returns a square tensor representing the correlation coefficient matrix.
+    ///  Given a tensor with \(N\) variables \(X=[x_1,x_2,\ldots,x_N]\) the
+    /// \(R_{i,j}\) entry on the correlation matrix is the correlation between
+    /// \(x_i\) and \(x_j\).
+    /// </returns>
+    /// <remarks>
+    /// The correlation between variables \(x\) and \(y\) is
+    ///  \[cor(x,y)= \frac{\sum^{N}_{i = 1}(x_{i} - \mu_x)(y_{i} - \mu_y)}{\sigma_x \sigma_y (N ~-~1)}\]
+    /// where \(\mu_x\) and \(\mu_y\) are the sample means and \(\sigma_x\) and \(\sigma_x\) are 
+    /// the sample standard deviations.
+    /// </remarks>
+    /// <example id="tensor-correlation1">
+    /// <code lang="fsharp">
+    /// let x = dsharp.tensor([-0.2678; -0.0908; -0.3766;  0.2780])
+    /// let y = dsharp.tensor([-0.5812;  0.1535;  0.2387;  0.2350])
+    /// let xy = dsharp.stack([x;y])
+    /// xy.corrcoef()
+    /// </code>
+    /// Evaluates to
+    /// <code>
+    /// tensor([[1.0000, 0.3582],
+    ///         [0.3582, 1.0000]])
+    /// </code>
+    /// </example>
+    member a.corrcoef() =
+        if a.dim > 2 then failwith $"Expected to have fewer than 2 dimensions but tensor.dim is {a.dim}"
+        let mutable c = a.cov()
+        if c.dim = 0 then 
+            c / c
+        else
+            let stddev:Tensor = c.diagonal().sqrt()
+            c <- c / stddev.view([-1;1])
+            c <- c / stddev.view([1;-1])
+            c.clamp(-1,1)
+
+    /// <summary>Returns a tensor where each row contains numSamples indices sampled from the multinomial probability distribution located in the corresponding row of tensor input.</summary>
+    /// <param name="numSamples">The number of samples to draw.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    /// <param name="normalize">Indicates where the probabilities should first be normalized by their sum.</param>
+    member probs.multinomial(numSamples:int, ?normalize:bool, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        // TODO: the following may be implemented by RawTensor at a later point
+        if probs.dim < 1 || probs.dim > 2 then failwithf "Expecting 1d or 2d probs, received shape %A" probs.shape
+        let device = defaultArg device probs.device
+        let dtype = defaultArg dtype Dtype.Int32
+        let backend = defaultArg backend probs.backend
+        let normalize = defaultArg normalize false
+        let mutable probs = probs
+        if normalize then probs <- probs / probs.sum(-1, keepDim=true)
+        if probs.dim = 1 then
+            let p = 
+                match probs.dtype with
+                | Dtype.Float16
+                | Dtype.BFloat16
+                | Dtype.Float32 -> probs.toArray() :?> float32[] |> Array.map Convert.ToDouble
+                | Dtype.Float64 -> probs.toArray() :?> float[]
+                | _ -> failwithf "Expecting probs to have dtype Float32 or Float64, received %A" probs.dtype
+            Tensor.create(Random.Multinomial(p, numSamples), device=device, dtype=dtype, backend=backend)
+        else
+            let p = 
+                match probs.dtype with
+                | Dtype.BFloat16
+                | Dtype.Float16
+                | Dtype.Float32 -> probs.toArray() :?> float32[,] |> Array2D.map Convert.ToDouble
+                | Dtype.Float64 -> probs.toArray() :?> float[,]
+                | _ -> failwithf "Expecting probs to be floating point, received %A" probs.dtype
+            Tensor.create(Random.Multinomial(p, numSamples), device=device, dtype=dtype, backend=backend)
+
+    /// <summary>Draws binary random numbers (0 or 1) from a Bernoulli distribution</summary>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    member probs.bernoulli(?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        // TODO: the following may be implemented by RawTensor at a later point
+        if not probs.dtype.IsFloatingPoint then failwithf "Expecting probs to be floating point, received %A" probs.dtype
+        let device = defaultArg device probs.device
+        let dtype = defaultArg dtype probs.dtype
+        let backend = defaultArg backend probs.backend
+        if probs.dim = 0 then
+            let b = Random.Bernoulli (float probs)
+            Tensor.create(b, device=device, dtype=dtype, backend=backend).view(probs.shape)
+        else
+            let p:Tensor = probs.float().flatten()
+            let b = p.toArray() :?> float[] |> Array.map Random.Bernoulli
+            Tensor.create(b, device=device, dtype=dtype, backend=backend).view(probs.shape)
+
+    /// <summary>Randomly zeroes some of the elements of the input tensor with probability p using samples from a Bernoulli distribution</summary>
+    /// <param name="p">The probability of an element to be zeroed. Default: 0.5.</param>
+    member a.dropout(?p:double) =
+        let p = defaultArg p 0.5
+        Shape.checkCanDropout p
+        if p = 0. then
+            a
+        elif p = 1. then
+            a * a.zerosLike()
+        else
+            let mask = a.fullLike(1.-p).bernoulli()
+            a * mask
+
+    /// <summary>Randomly zero out entire channels (a channel is a 2D feature map, e.g., the jj -th channel of the ii -th sample in the batched input is a 2D tensor \text{input}[i, j]input[i,j] ). Each channel will be zeroed out independently on every forward call with probability p using samples from a Bernoulli distribution</summary>
+    /// <param name="p">The probability of an element to be zeroed. Default: 0.5.</param>
+    member a.dropout2d(?p:double) =
+        let p = defaultArg p 0.5
+        Shape.checkCanDropout2d a.shape p
+        if p = 0. then
+            a
+        elif p = 1. then
+            a * a.zerosLike()
+        else
+            let mask = a.fullLike(1.-p, Array.append a.shape[0..1] [|1;1|]).bernoulli()
+            a * mask
+
+    /// <summary>Randomly zero out entire channels (a channel is a 3D feature map, e.g., the jj -th channel of the ii -th sample in the batched input is a 3D tensor \text{input}[i, j]input[i,j] ). Each channel will be zeroed out independently on every forward call with probability p using samples from a Bernoulli distribution.</summary>
+    /// <param name="p">The probability of an element to be zeroed. Default: 0.5.</param>
+    member a.dropout3d(?p:double) =
+        let p = defaultArg p 0.5
+        Shape.checkCanDropout3d a.shape p
+        if p = 0. then
+            a
+        elif p = 1. then
+            a * a.zerosLike()
+        else
+            let mask = a.fullLike(1.-p, Array.append a.shape[0..1] [|1;1;1|]).bernoulli()
+            a * mask
+    
+    /// <summary>Returns a tensor that is a transposed version of input. The given dimensions dim0 and dim1 are swapped.</summary>
+    /// <param name="dim0">The first dimension to be transposed.</param>
+    /// <param name="dim1">The second dimension to be transposed.</param>
+    member a.transpose(dim0:int, dim1:int) =
+        let dim0 = Shape.completeDim a.dim dim0  // Handles -1 semantics
+        let dim1 = Shape.completeDim a.dim dim1  // Handles -1 semantics
+        Shape.checkCanTranspose a.shape dim0 dim1
+        if dim0 = dim1 then
+            a
+        else
+            let inline fRaw(a:RawTensor) = a.TransposeT(dim0, dim1)
+            Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns the original tensor with its dimensions permuted.</summary>
+    /// <param name="permutation">The desired ordering of dimensions.</param>
+    member a.permute(permutation:seq<int>) =
+        let permutation = Seq.toArrayQuick permutation
+        let inversePermutation, _ = Shape.checkCanPermute a.shape permutation
+        if permutation |> Array.foralli (fun i j -> i = j) then
+            a
+        else
+            let inline fRaw(a:RawTensor) = a.PermuteT(permutation)
+            Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a tensor that is a transposed version of input with dimensions 0 and 1 swapped.</summary>
+    member a.transpose() =
+        Shape.checkCanTranspose2d a.dim
+        let inline fRaw(a:RawTensor) = a.TransposeT2()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a tensor with all the dimensions of input of size 1 removed.</summary>
+    /// <remarks>If the tensor has a batch dimension of size 1, then squeeze(input) will also remove the batch dimension, which can lead to unexpected errors.</remarks>
+    /// <param name="dim">If given, the input will be squeezed only in this dimension.</param>
+    member a.squeeze(?dim:int) =
+        let dim = defaultArg dim -1
+        let inline fRaw(a:RawTensor) = a.SqueezeT(dim)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a new tensor with a dimension of size one inserted at the specified position</summary>
+    /// <param name="dim">The index at which to insert the singleton dimension.</param>
+    member a.unsqueeze(dim:int) : Tensor =
+        let dim = Shape.completeDimUnsqueeze a.dim dim
+        let inline fRaw(a:RawTensor) = a.UnsqueezeT(dim)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a new tensor with dimensions of size one appended to the end until the number of dimensions is the same as the other tensor.</summary>
+    /// <param name="other">The other tensor.</param>
+    member a.unsqueezeAs(other:Tensor) =
+        if a.dim >= other.dim then a
+        else
+            let newShape = Array.create other.dim 1
+            System.Array.Copy(a.shape, newShape, a.shape.Length)
+            a.view(newShape)
+
+    /// <summary>Reverse the order of a n-D tensor along given axis in dims</summary>
+    /// <param name="dims">The axis to flip on.</param>
+    member a.flip(dims:seq<int>) =
+        let dims = dims |> Array.ofSeq
+        Shape.checkCanFlip a.dim dims
+        let inline fRaw(a:RawTensor) = a.FlipT(dims)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Dilate the tensor in using the given dilations in each corresponding dimension.</summary>
+    /// <param name="dilations">The dilations to use.</param>
+    member a.dilate(dilations:seq<int>) =
+        let dilations = dilations |> Array.ofSeq
+        Shape.checkCanDilate a.dim dilations
+        let inline fRaw(a:RawTensor) = a.DilateT(dilations)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Reverse the dilation of the tensor in using the given dilations in each corresponding dimension.</summary>
+    /// <param name="dilations">The dilations to use.</param>
+    member a.undilate(dilations:seq<int>) =
+        let dilations = dilations |> Array.ofSeq
+        let inline fRaw(a:RawTensor) = a.UndilateT(dilations)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Repeat elements of a tensor</summary>
+    /// <param name="dim">The dimension along which to repeat values.</param>
+    /// <param name="times">The number of repetitions for each element.</param>
+    member a.repeat(dim:int, times:int) =
+        // Note: the repeat op was used in the days before broadcasting was implemented
+        // Most of its uses are now covered by broadcast and expand. But the operation
+        // is well defined and correct so we can keep it.
+        Shape.checkCanRepeat a.shape dim
+        let newShape = a.shape |> Array.copy
+        newShape[dim] <- times
+        let mutable ret = a.zerosLike(newShape)
+        let location = Array.create a.dim 0
+        for i=0 to times-1 do
+            location[dim] <- i
+            ret <- ret.addSlice(location, a)
+        ret
+
+    /// <summary>Gathers values along an axis specified by dim.</summary>
+    /// <param name="dim">The axis along which to index.</param>
+    /// <param name="indices">The the indices of elements to gather.</param>
+    member a.gather(dim:int, indices:Tensor) =
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        Shape.checkCanGather a.shape dim indices.shape indices.dtype
+        let inline fRaw(a:RawTensor) = a.GatherT(dim, indices.primalRaw)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Scatter values along an axis specified by dim.</summary>
+    /// <param name="dim">The axis along which to index.</param>
+    /// <param name="indices">The the indices of elements to gather.</param>
+    /// <param name="destinationShape">The destination shape.</param>
+    member a.scatter(dim:int, indices:Tensor, destinationShape:seq<int>) =
+        let destinationShape = destinationShape|>Shape.create
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        Shape.checkCanScatter a.shape dim indices.shape indices.dtype destinationShape
+        let inline fRaw(a:RawTensor) = a.ScatterT(dim, indices.primalRaw, destinationShape)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a new tensor with the same data as the self tensor but of a different shape.</summary>
+    /// <remarks>
+    ///   The returned tensor shares the same data and must have the same number of elements, but may have a different size. 
+    ///   For a tensor to be viewed, the new view size must be compatible with its original size and stride, i.e., each new view dimension must either be a subspace of an original dimension,
+    ///   or only span across original dimensions \(d, d+1, \dots, d+kd,d+1,…,d+k\) that satisfy the following contiguity-like condition that
+    ///   \(\forall i = d, \dots, d+k-1∀i=d,…,d+k−1 ,\) \[\text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]\]
+    /// </remarks>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    member a.view(shape:seq<int>) =
+        let shape = shape |> Shape.create |> Shape.complete a.nelement  // Handles -1 semantics
+        if a.shape = shape then a // Do nothing if the shapes are the same
+        else
+        Shape.checkCanView a.shape shape
+        let inline fRaw(a:RawTensor) = a.ViewT(shape)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a new tensor with the same data as the object tensor but of a different shape.</summary>
+    /// <remarks>
+    ///   The returned tensor shares the same data and must have the same number of elements, but may have a different size. 
+    ///   For a tensor to be viewed, the new view size must be compatible with its original size and stride, i.e., each new view dimension must either be a subspace of an original dimension,
+    ///   or only span across original dimensions \(d, d+1, \dots, d+kd,d+1,…,d+k\) that satisfy the following contiguity-like condition that
+    ///   \(\forall i = d, \dots, d+k-1∀i=d,…,d+k−1 ,\) \[\text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]\]
+    /// </remarks>
+    /// <param name="shape">the desired shape</param>
+    member t.view(shape:int) = t.view([|shape|])
+
+    /// <summary>View this tensor as the same size as other.</summary>
+    /// <remarks>The returned tensor shares the same data and must have the same number of elements, but may have a different size. For a tensor to be viewed, the new view size must be compatible with its original size.
+    ///   The returned tensor shares the same data and must have the same number of elements, but may have a different size. 
+    ///   For a tensor to be viewed, the new view size must be compatible with its original size and stride, i.e., each new view dimension must either be a subspace of an original dimension,
+    ///   or only span across original dimensions \(d, d+1, \dots, d+kd,d+1,…,d+k\) that satisfy the following contiguity-like condition that
+    ///   \(\forall i = d, \dots, d+k-1∀i=d,…,d+k−1 ,\) \[\text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]\]
+    /// </remarks>
+    /// <param name="other">The result tensor has the same size as other.</param>
+    member a.viewAs(other:Tensor) = a.view(other.shape)
+
+    /// <summary>Flattens a contiguous range of dims in a tensor.</summary>
+    /// <param name="startDim">The first dim to flatten.</param>
+    /// <param name="endDim">The last dim to flatten.</param>
+    member a.flatten(?startDim:int, ?endDim:int) =
+        if a.dim < 2 then 
+            a
+        else
+            let startDim = defaultArg startDim 0
+            let endDim = defaultArg endDim (a.dim - 1)
+            Shape.checkCanFlatten a.shape startDim endDim
+            a.view(a.shape |> Shape.flatten startDim endDim)
+
+    /// <summary>Unflattens a tensor dimension by expanding it to the given shape.</summary>
+    /// <param name="dim">The dimension to unflatten.</param>
+    /// <param name="unflattenedShape">New shape of the unflattened dimenension.</param>
+    member a.unflatten(dim:int, unflattenedShape:seq<int>) =
+        let dim = Shape.completeDim a.dim dim
+        if Shape.nelement (unflattenedShape |> Array.ofSeq) <> a.shape[dim] then failwithf "Expecting unflattenedShape (%A) to have the same number of elements with tensor's shape (%A) at given dim (%A)" unflattenedShape a.shape dim
+        let newShape = a.shape |> Array.removeAt dim |> Array.insertManyAt dim unflattenedShape
+        a.view(newShape)
+
+    member internal a.clampWithMask(?low:scalar, ?high:scalar) =
+        let a = a
+        let lowTensor, highTensor = 
+            match low, high with
+            | Some l, Some h -> a.like(l), a.like(h)
+            | Some l, None   -> a.like(l), a.like(System.Double.PositiveInfinity) // Having PositiveInfinity as upper limit is critical here, using a.max() does not work for some edge cases
+            | None,   Some h -> a.like(System.Double.NegativeInfinity), a.like(h) // Having NegativeInfinity as lower limit is critical here, using a.min() does not work for some edge cases
+            | None, None     -> failwithf "Expecting at least one of low, high"
+        let mask() = // one-zero mask where the clamped values are zero and the rest are one
+            let ll = lowTensor.expand(a.shape)
+            let hh = highTensor.expand(a.shape)
+            1 - (a.lt(ll) + a.gt(hh)).cast(a.dtype)
+        match a with
+        | TensorC(ap)          -> let result, mask = ap.ClampT(lowTensor.primalRaw, highTensor.primalRaw), mask() in TensorC(result), mask
+
+    /// <summary>Clamp all elements in input into the range [ low..high] and return a resulting tensor</summary>
+    /// <param name="low">The lower-bound of the range to be clamped to.</param>
+    /// <param name="high">The upper-bound of the range to be clamped to.</param>
+    member a.clamp(?low:scalar, ?high:scalar) = a.clampWithMask(?low=low, ?high=high) |> fst
+
+    /// <summary>Returns a new tensor with the signs of the elements of input.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    member a.sign() =
+        let inline fRaw(a:RawTensor) = a.SignT()
+        Tensor.OpUnary(a, fRaw)
+    // static member Sign(a:Tensor) = a.sign() // not supported because FSharp.Core sign operator returns int
+
+    /// <summary>Returns a new tensor with the floor of the elements of input, the largest integer less than or equal to each element.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    member a.floor() =
+        let inline fRaw(a:RawTensor) = a.FloorT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>floor</c>.</summary>
+    static member Floor(a:Tensor) = a.floor() // needed for FSharp.Core floor operator overload
+
+    /// <summary>Returns a new tensor with the ceil of the elements of input, the smallest integer greater than or equal to each element.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    member a.ceil() =
+        let inline fRaw(a:RawTensor) = a.CeilT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>ceil</c>.</summary>
+    static member Ceiling(a:Tensor) = a.ceil() // needed for FSharp.Core ceil operator overload
+
+    /// <summary>Returns a new tensor with each of the elements of input rounded to the closest integer.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    member a.round() =
+        let inline fRaw(a:RawTensor) = a.RoundT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>round</c>.</summary>
+    static member Round(a:Tensor) = a.round() // needed for FSharp.Core round operator overload
+
+    /// <summary>Computes the element-wise absolute value of the given input tensor.</summary>
+    member a.abs() =
+        let inline fRaw(a:RawTensor) = a.AbsT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>abs</c>.</summary>
+    static member Abs(a:Tensor) : Tensor = a.abs() // needed for FSharp.Core abs operator overload
+
+    /// <summary>Applies the rectified linear unit function element-wise.</summary>
+    member a.relu() =
+        let inline fRaw(a:RawTensor) = a.ReluT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Applies the leaky rectified linear unit function element-wise</summary>
+    /// <remarks>\[\text{leakyRelu}(x) = \max(0, x) + \text{negativeSlope} * \min(0, x)\]</remarks>
+    /// <param name="negativeSlope">Controls the angle of the negative slope. Default: 0.01.</param>
+    member a.leakyRelu(?negativeSlope:float) =
+        let negativeSlope = defaultArg negativeSlope 0.01
+        let zeros = a.zerosLike() in zeros.max(a) + negativeSlope * zeros.min(a)
+
+    /// <summary>Applies the sigmoid element-wise function</summary>
+    /// <remarks>\[\text{sigmoid}(x) = \frac{1}{1 + \exp(-x)}\]</remarks>
+    member a.sigmoid() =
+        let inline fRaw(a:RawTensor) = a.SigmoidT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Applies the exp function element-wise.</summary>
+    member a.exp() =
+        let inline fRaw(a:RawTensor) = a.ExpT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>exp</c>.</summary>
+    static member Exp(a:Tensor) = a.exp() // needed for FSharp.Core exp operator overload
+
+    /// <summary>Returns a new tensor with the natural logarithm of the elements of input.</summary>
+    /// <remarks> \[y_{i} = \log_{e} (x_{i})\]</remarks>
+    member a.log() =
+        let inline fRaw(a:RawTensor) = a.LogT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>log</c>.</summary>
+    static member Log(a:Tensor) = a.log() // needed for FSharp.Core log operator overload
+
+    /// <summary>Returns the logarithm of the tensor after clamping the tensor so that all its elements are greater than epsilon. This is to avoid a -inf result for elements equal to zero.</summary>
+    member a.safelog(?epsilon:float) =
+        let epsilon = defaultArg epsilon 1e-12
+        a.clamp(low=epsilon).log()
+
+    /// <summary>Applies the softplus function element-wise.</summary>
+    /// <remarks>\[\text{softplus}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x))\]</remarks>
+    member a.softplus() =
+        let inline fRaw(a:RawTensor) = a.SoftplusT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Returns a new tensor with the logarithm to the base 10 of the elements of input.</summary>
+    /// <remarks>\[y_{i} = \log_{10} (x_{i})\]</remarks>
+    member a.log10() =
+        let inline fRaw(a:RawTensor) = a.Log10T()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>log10</c>.</summary>
+    static member Log10(a:Tensor) = a.log10() // needed for FSharp.Core log10 operator overload
+
+    /// <summary>Returns a new tensor with the square-root of the elements of input.</summary>
+    member a.sqrt() =
+        let inline fRaw(a:RawTensor) = a.SqrtT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>sqrt</c>.</summary>
+    static member Sqrt(a:Tensor) = a.sqrt() // needed for FSharp.Core sqrt operator overload
+
+    /// <summary>Returns a new tensor with the sine of the elements of input</summary>
+    member a.sin() =
+        let inline fRaw(a:RawTensor) = a.SinT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>sin</c>.</summary>
+    static member Sin(a:Tensor) = a.sin() // needed for FSharp.Core sin operator overload
+
+    /// <summary>Returns a new tensor with the cosine of the elements of input</summary>
+    member a.cos() =
+        let inline fRaw(a:RawTensor) = a.CosT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>cos</c>.</summary>
+    static member Cos(a:Tensor) = a.cos() // needed for FSharp.Core cos operator overload
+
+    /// <summary>Returns a new tensor with the tangent of the elements of input</summary>
+    member a.tan() =
+        let inline fRaw(a:RawTensor) = a.TanT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>tan</c>.</summary>
+    static member Tan(a:Tensor) = a.tan() // needed for FSharp.Core tan operator overload
+
+    /// <summary>Returns a new tensor with the hyperbolic sine of the elements of input.</summary>
+    member a.sinh() =
+        let inline fRaw(a:RawTensor) = a.SinhT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>sinh</c>.</summary>
+    static member Sinh(a:Tensor) = a.sinh() // needed for FSharp.Core sinh operator overload
+
+    /// <summary>Returns a new tensor with the hyperbolic cosine of the elements of input.</summary>
+    member a.cosh() =
+        let inline fRaw(a:RawTensor) = a.CoshT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>cosh</c>.</summary>
+    static member Cosh(t:Tensor) = t.cosh() // needed for FSharp.Core cosh operator overload
+
+    /// <summary>Returns a new tensor with the hyperbolic tangent of the elements of input.</summary>
+    member a.tanh() =
+        let inline fRaw(a:RawTensor) = a.TanhT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>tanh</c>.</summary>
+    static member Tanh(t:Tensor) = t.tanh() // needed for FSharp.Core tanh operator overload
+
+    /// <summary>Returns a new tensor with the arcsine of the elements of input.</summary>
+    member a.asin() =
+        let inline fRaw(a:RawTensor) = a.AsinT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>asin</c>.</summary>
+    static member Asin(t:Tensor) = t.asin() // needed for FSharp.Core asin operator overload
+
+    /// <summary>Returns a new tensor with the arccosine of the elements of input.</summary>
+    member a.acos() =
+        let inline fRaw(a:RawTensor) = a.AcosT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>acos</c>.</summary>
+    static member Acos(t:Tensor) = t.acos() // needed for FSharp.Core acos operator overload
+
+    /// <summary>Returns a new tensor with the arctangent of the elements of input.</summary>
+    member a.atan() =
+        let inline fRaw(a:RawTensor) = a.AtanT()
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>A method to enable the use of the F# function <c>atan</c>.</summary>
+    static member Atan(t:Tensor) = t.atan() // needed for FSharp.Core atan operator overload
+
+    /// <summary>Add the given tensor as a slice at the given location.</summary>
+    member a.addSlice(location:seq<int>, b:Tensor) =
+        let location = location |> Seq.toArray
+        Shape.checkCanAddSlice a.shape location b.shape
+        if a.shape = b.shape && location |> Array.forall ((=) 0) then a + b // No need to do the slice addition below
+        else
+        let inline fRaw(a:RawTensor,b) = a.AddTTSlice(location, b)
+        Tensor.OpBinary(a, b, fRaw)
+
+    /// <summary>Applies a softmax function.</summary>
+    /// <remarks>Softmax is defined as: \text{softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}.</remarks>
+    /// <param name="dim">A dimension along which softmax will be computed.</param>
+    member a.softmax(dim:int) =
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        let e = (a - a.max().noDiff()).exp()
+        let esum = e.sum(dim, keepDim=true)
+        e / esum
+
+    /// <summary>Applies a softmax followed by a logarithm.</summary>
+    /// <param name="dim">A dimension along which softmax will be computed.</param>
+    member a.logsoftmax(dim:int) =
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        a - a.logsumexp(dim, keepDim=true)
+
+    /// <summary>Applies a logsumexp.</summary>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    member a.logsumexp(dim:int, ?keepDim:bool) =
+        let dim = Shape.completeDim a.dim dim  // Handles -1 semantics
+        let keepDim = defaultArg keepDim false
+        let amax = a.max().noDiff()
+        let e = (a - amax).exp()
+        let res = amax + e.sum(dim).add(System.Single.Epsilon).log()
+        if keepDim then res.unsqueeze(dim) else res
+
+    /// <summary>Creates a criterion that measures the mean squared error (squared L2 norm) between each element in the input and the target.</summary>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    member input.mseLoss(target:Tensor, ?reduction:string) = 
+        if input.shape <> target.shape then failwithf "Expecting input.shape (%A) and target.shape (%A) to be the same" input.shape target.shape
+        let reduction = defaultArg reduction "mean"
+        if not (reduction = "none" || reduction = "mean" || reduction = "sum") then failwithf "Expecting reduction (%A) to be one of (none, mean, sum)" reduction
+        let z = input - target
+        let l = z * z
+        if reduction = "none" then
+            l
+        elif reduction = "mean" then
+            l.mean()
+        else // reduction = "sum"
+            l.sum()
+
+    /// <summary>Creates a criterion that measures the Binary Cross Entropy between the target and the output</summary>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="weight">A manual rescaling weight given to the loss of each batch element.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    member input.bceLoss(target:Tensor, ?weight:Tensor, ?reduction:string) =
+        if input.shape <> target.shape then failwithf "Expecting input shape (%A) and target shape (%A) to be the same" input.shape target.shape
+        if float (input.max()) > 1. || float (input.min()) < 0. then failwithf "Expecting input values to be between 0 and 1, received %A %.20f %A.20f" input (float (input.max())) (float (input.min()))
+        if float (target.max()) > 1. || float (target.min()) < 0. then failwithf "Expecting target values to be between 0 and 1, received %A" target
+        if input.dim < 1 then let ret:Tensor = input.view(-1).bceLoss(target.view(-1), ?weight=weight, ?reduction=reduction) in if ret.dim = 0 then ret else ret[0]
+        else
+        let n = input.shape[0]
+        let weight = defaultArg weight (input.onesLike(shape=[|n|]))
+        if weight.shape[0] <> n then failwithf "Expecting weight to be a vector of size %A, but received %A" n weight.shape[0]
+        let reduction = defaultArg reduction "mean"
+        if not (reduction = "none" || reduction = "mean" || reduction = "sum") then failwithf "Expecting reduction (%A) to be one of (none, mean, sum)" reduction
+        let epsilon = 1e-12
+        let clampLog = -100
+        let l = -weight.unsqueezeAs(input)*(target * input.safelog(epsilon).clamp(low=clampLog) + (1.-target) * (1.-input).safelog(epsilon).clamp(low=clampLog))
+        if reduction = "none" then
+            l
+        elif reduction = "mean" then
+            l.mean()
+        else // reduction = "sum"
+            l.sum()
+
+    /// <summary>This criterion combines logsoftmax and nllLoss in a single function</summary>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="weight">A optional manual rescaling weight given to the loss of each batch element.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    member input.crossEntropyLoss(target:Tensor, ?weight:Tensor, ?reduction:string) =
+        input.logsoftmax(dim=1).nllLoss(target, ?weight=weight, ?reduction=reduction)
+
+    /// <summary>The negative log likelihood loss.</summary>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="weight">A optional manual rescaling weight given to the loss of each batch element.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    member input.nllLoss(target:Tensor, ?weight:Tensor, ?reduction:string) =
+        let n, classes, d = 
+            if input.dim < 2 
+                then failwithf "Expecting either: input with shape (N,C) and target with shape (N); or input with shape (N,C,d1,d2,...,dk) and target with shape (N,d1,d2,...,dk). Received input.shape %A and target.shape %A" input.shape target.shape
+            elif input.dim = 2 then
+                let n, c = input.shape[0], input.shape[1]
+                if target.shape <> [|n|] then failwithf "Expecting either: input with shape (N,C) and target with shape (N); or input with shape (N,C,d1,d2,...,dk) and target with shape (N,d1,d2,...,dk). Received input.shape %A and target.shape %A" input.shape target.shape
+                n, c, [||]
+            else
+                let n, c, d = input.shape[0], input.shape[1], input.shape[2..]
+                if target.shape[0] <> n then failwithf "Expecting either: input with shape (N,C) and target with shape (N); or input with shape (N,C,d1,d2,...,dk) and target with shape (N,d1,d2,...,dk). Received input.shape %A and target.shape %A" input.shape target.shape
+                if d <> target.shape[1..] then failwithf "Expecting either: input with shape (N,C) and target with shape (N); or input with shape (N,C,d1,d2,...,dk) and target with shape (N,d1,d2,...,dk). Received input.shape %A and target.shape %A" input.shape target.shape
+                n, c, d
+        let target = target.int()
+        let weightSpecified, weight = 
+            match weight with
+            | Some w -> 
+                if w.dim <> 1 || w.shape[0] <> classes then failwithf "Expecting weight with shape (C). Received weight.shape %A" w.shape
+                let vv = Array.create input.dim 1
+                vv[1] <- classes
+                true, w.view(vv).expandAs(input).gather(1, target.unsqueeze(1)).squeeze(1)
+            | None -> false, input.zeroLike()
+        let reduction = defaultArg reduction "mean"
+        if not (reduction = "none" || reduction = "mean" || reduction = "sum") then failwithf "Expecting reduction (%A) to be one of (none, mean, sum)" reduction
+        let mutable l = input.gather(1, target.unsqueeze(1)).squeeze(1).neg()
+        if weightSpecified then
+            l <- l * weight
+        if reduction = "none" then
+            l
+        elif reduction = "mean" then
+            if weightSpecified then l.sum()/weight.sum() else l.mean()
+        else // reduction = "sum"
+            l.sum()
+
+    /// <summary>Add zero padding to each side of a tensor</summary>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    member a.pad(paddings:seq<int>) =
+        let paddings = paddings |> Array.ofSeq
+        Shape.checkCanPad a.shape paddings
+        if paddings |> Array.sum = 0 then
+            a
+        else
+            let shape = Array.copy a.shape
+            for i in 0..shape.Length-1 do
+                shape[i] <- shape[i] + paddings[i] * 2
+            let ret = a.zerosLike(shape)
+            ret.addSlice(paddings, a)
+
+    /// <summary>Applies a 1D max pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    member a.maxpool1di(kernelSize:int, ?stride:int, ?padding:int) =
+        let stride = defaultArg stride kernelSize
+        let padding = defaultArg padding 0
+        Shape.checkCanMaxpool1d a.dtype a.shape kernelSize stride padding  |> ignore
+        match a with
+        | TensorC(ap)          -> let result, indices = ap.MaxPool1D(kernelSize, stride, padding) in TensorC(result), TensorC(indices)
+
+    /// <summary>Applies a 1D max pooling over an input signal composed of several input planes.</summary>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    member a.maxpool1d(kernelSize:int, ?stride:int, ?padding:int) = a.maxpool1di(kernelSize, ?stride=stride, ?padding=padding) |> fst
+
+    /// <summary>Computes a partial inverse of maxpool1di</summary>
+    /// <param name="indices">The indices selected by maxpool1di.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="outputSize">The targeted output size.</param>
+    member a.maxunpool1d(indices:Tensor, kernelSize:int, ?stride:int, ?padding:int, ?outputSize:seq<int>) =
+        let stride = defaultArg stride kernelSize
+        let padding = defaultArg padding 0
+        let outputSize = 
+            match outputSize with
+            | Some o -> let o = o |> Array.ofSeq in if o.Length <> 3 then failwithf "Expecting outputSize to be 3-dimensional" else o
+            | None -> 
+                let inputSize = a.shape[2]
+                [|indices.shape[0]; indices.shape[1]; ((inputSize-1) * stride - 2*padding + kernelSize)|]
+        Shape.checkCanMaxunpool1d a.dtype a.shape indices.dtype indices.shape outputSize |> ignore
+        let inline fRaw(a:RawTensor) = a.MaxUnpool1D(indices.primalRaw, outputSize)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Applies a 2D max pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    member a.maxpool2di(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) =
+        let kernelSizes, strides, paddings = Shape.resolve2dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+        Shape.checkCanMaxpool2d a.dtype a.shape kernelSizes strides paddings  |> ignore
+        match a with
+        | TensorC(ap)          -> let result, indices = ap.MaxPool2D(kernelSizes, strides, paddings) in TensorC(result), TensorC(indices)
+
+    /// <summary>Applies a 2D max pooling over an input signal composed of several input planes.</summary>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    member a.maxpool2d(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) = a.maxpool2di(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings) |> fst
+
+    /// <summary>Computes a partial inverse of maxpool2di</summary>
+    /// <param name="indices">The indices selected by maxpool2di.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSizes.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    /// <param name="outputSize">The targeted output size.</param>
+    member a.maxunpool2d(indices:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?outputSize:seq<int>) =
+        let kernelSizes, strides, paddings = Shape.resolve2dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+        let outputSize = 
+            match outputSize with
+            | Some o -> let o = o |> Array.ofSeq in if o.Length <> 4 then failwithf "Expecting outputSize to be 4-dimensional" else o
+            | None -> 
+                let inputHeight = a.shape[2]
+                let inputWidth = a.shape[3]
+                [|indices.shape[0]; indices.shape[1]; ((inputHeight-1) * strides[0] - 2*paddings[0] + kernelSizes[0]); ((inputWidth-1) * strides[1] - 2*paddings[1] + kernelSizes[1])|]
+        Shape.checkCanMaxunpool2d a.dtype a.shape indices.dtype indices.shape outputSize |> ignore
+        let inline fRaw(a:RawTensor) = a.MaxUnpool2D(indices.primalRaw, outputSize)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Applies a 3D max pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    member a.maxpool3di(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) =
+        let kernelSizes, strides, paddings = Shape.resolve3dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+        Shape.checkCanMaxpool3d a.dtype a.shape kernelSizes strides paddings |> ignore
+        match a with
+        | TensorC(ap)          -> let result, indices = ap.MaxPool3D(kernelSizes, strides, paddings) in TensorC(result), TensorC(indices)
+
+    /// <summary>Applies a 3D max pooling over an input signal composed of several input planes.</summary>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSizes.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    member a.maxpool3d(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) = a.maxpool3di(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings) |> fst
+
+    /// <summary>Computes a partial inverse of maxpool3di</summary>
+    /// <param name="indices">The indices selected by maxpool3di.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSizes.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    /// <param name="outputSize">The targeted output size.</param>
+    member a.maxunpool3d(indices:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?outputSize:seq<int>) =
+        let kernelSizes, strides, paddings = Shape.resolve3dMaxPoolSizes kernelSize kernelSizes stride strides padding paddings
+        let outputSize = 
+            match outputSize with
+            | Some o -> let o = o |> Array.ofSeq in if o.Length <> 5 then failwithf "Expecting outputSize to be 5-dimensional" else o
+            | None -> 
+                let inputDepth = a.shape[2]
+                let inputHeight = a.shape[3]
+                let inputWidth = a.shape[4]
+                [|indices.shape[0]; indices.shape[1]; ((inputDepth-1) * strides[0] - 2*paddings[0] + kernelSizes[0]); ((inputHeight-1) * strides[1] - 2*paddings[1] + kernelSizes[1]); ((inputWidth-1) * strides[2] - 2*paddings[2] + kernelSizes[2])|]
+        Shape.checkCanMaxunpool3d a.dtype a.shape indices.dtype indices.shape outputSize |> ignore
+        let inline fRaw(a:RawTensor) = a.MaxUnpool3D(indices.primalRaw, outputSize)
+        Tensor.OpUnary(a, fRaw)
+
+    /// <summary>Applies a 1D convolution over an input signal composed of several input planes</summary>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit paddings on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    member a.conv1d(filters:Tensor, ?stride:int, ?padding:int, ?dilation:int) =
+        let b = filters
+        let stride = defaultArg stride 1
+        let padding = defaultArg padding 0
+        let dilation = defaultArg dilation 1
+        Shape.checkCanConv1d a.deviceType b.deviceType a.dtype b.dtype a.shape b.shape stride padding dilation |> ignore
+        let mutable b = b
+        if dilation > 1 then
+            b <- b.dilate([|1;1;dilation|])
+        let inline fRaw(a:RawTensor,b) = a.Conv1D(b, stride, padding)
+        Tensor.OpBinary(a, b, fRaw)
+
+    // a: input, NxCxI (batchSize x inputChannels x inputLength)
+    // b: filters, KxCxF (outputChannels x inputChannels x kernelLength)
+    // t: output, NxKxL (batchSize x outputChannels x outputLength)
+    static member internal conv1dReverseDiff(a: Tensor, b:Tensor, fderivative:Tensor, aConst:bool, bConst:bool, stride:int, padding:int) =
+        let a = if aConst then a else a.primal
+        let b = if bConst then b else b.primal
+        let batchSize = fderivative.shape[0]
+        let outputChannels = fderivative.shape[1]
+        // let outputLength = fderivative.shape[2]
+        let inputChannels = a.shape[1]
+        let inputLength = a.shape[2]
+        let kernelLength = b.shape[2]
+        let mutable fderivative = fderivative
+        if stride > 1 then
+            fderivative <- fderivative.dilate([|1;1;stride|])
+        let mutable aderivative = a.zeroLike()
+        let mutable bderivative = b.zeroLike()
+        if not aConst then
+            // propagate to a
+            let bFlipped = b.flip([|2|])
+            let mutable ad = fderivative.conv1d(bFlipped.transpose(0, 1), padding=kernelLength-1)
+            if padding > 0 then
+                let adBounds = array2D [[0; batchSize-1; 0]; [0; inputChannels-1; 0]; [padding; padding + inputLength - 1; 0]]
+                ad <- ad.GetSlice(adBounds)
+                ad <- ad.view([|batchSize; inputChannels; inputLength|])
+            aderivative <- a.zerosLike().addSlice([|0; 0; 0|], ad)
+        if not bConst then
+            // propagate to b
+            let aa = a.transpose(0, 1)
+            let fd = fderivative.transpose(0, 1)
+            let bd = aa.conv1d(fd, padding=padding).transpose(0, 1)
+            let bdBounds = array2D [[0;outputChannels-1;0]; [0;inputChannels-1;0]; [0;kernelLength-1;0]]
+            bderivative <- bd.GetSlice(bdBounds)
+        aderivative, bderivative
+    
+    /// <summary>Applies a 1D transposed convolution operator over an input signal composed of several input planes, sometimes also called 'deconvolution'.</summary>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="outputPadding">The additional size added to one side of each dimension in the output shape.</param>
+    member a.convTranspose1d(filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int) =
+        let b = filters
+        let stride = defaultArg stride 1
+        let padding = defaultArg padding 0
+        let dilation = defaultArg dilation 1
+        let outputPadding = defaultArg outputPadding 0
+
+        let _, _, _, _, _, outputShape =
+            Shape.checkCanConvTranspose1d a.deviceType b.deviceType a.dtype b.dtype a.shape b.shape stride padding dilation outputPadding
+        let mutable b = b
+        if dilation > 1 then
+            b <- b.dilate([|1; 1; dilation|])
+        let fderivative = a
+        let a = a.zerosLike(outputShape)
+        // Use convolution reverse mode to implement transposed convolution
+        let (aderivative:Tensor), _ = Tensor.conv1dReverseDiff(a, b, fderivative, aConst=false, bConst=true, stride=stride, padding=padding)
+        aderivative
+
+    /// <summary>Applies a 2D convolution over an input signal composed of several input planes</summary>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on corresponding sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    member a.conv2d(filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>) =
+        let b = filters
+        let strides, paddings, dilations = Shape.resolve2dConvSizes stride strides padding paddings dilation dilations
+        Shape.checkCanConv2d a.deviceType b.deviceType a.dtype b.dtype a.shape b.shape strides paddings dilations |> ignore
+        let mutable b = b
+        if dilations[0] > 1 || dilations[1] > 1 then
+            b <- b.dilate([|1; 1; dilations[0]; dilations[1]|])
+        let inline fRaw(a:RawTensor,b) = a.Conv2D(b, strides, paddings)
+        Tensor.OpBinary(a, b, fRaw)
+
+    // a: input, NxCxHxW (batchSize x inputChannels x inputHeight x inputWidth)
+    // b: filters, KxCxFxG (outputChannels x inputChannels x kernelHeight x kernelWidth)
+    // t: output, NxKxLxM (batchSize x outputChannels x outputHeight x outputWidth)
+    static member internal conv2dReverseDiff(a: Tensor, b:Tensor, fderivative:Tensor, aConst:bool, bConst:bool, strides:int[], paddings:int[]) =
+        let a = if aConst then a else a.primal
+        let b = if bConst then b else b.primal
+        let batchSize = fderivative.shape[0]
+        let outputChannels = fderivative.shape[1]
+        // let outputHeight = fderivative.shape[2]
+        // let outputWidth = fderivative.shape[3]
+        let inputChannels = a.shape[1]
+        let inputHeight = a.shape[2]
+        let inputWidth = a.shape[3]
+        let kernelHeight = b.shape[2]
+        let kernelWidth = b.shape[3]
+        let mutable fderivative = fderivative
+        if strides[0] > 1 || strides[1] > 1 then
+            fderivative <- fderivative.dilate([|1;1;strides[0];strides[1]|])
+        let mutable aderivative = a.zeroLike()
+        let mutable bderivative = b.zeroLike()
+        if not aConst then
+            // propagate to a
+            let bFlipped = b.flip([|2;3|])
+            let mutable ad = fderivative.conv2d(bFlipped.transpose(0, 1), paddings=[|kernelHeight-1; kernelWidth-1|])
+            if paddings[0] > 0 || paddings[1] > 0 then
+                let adBounds = array2D [[0; batchSize-1; 0]; 
+                                       [0; inputChannels-1; 0]; 
+                                       [paddings[0]; paddings[0] + inputHeight - 1; 0]; 
+                                       [paddings[1]; paddings[1] + inputWidth - 1; 0]]
+                ad <- ad.GetSlice(adBounds)
+                ad <- ad.view([|batchSize; inputChannels; inputHeight; inputWidth|])
+            aderivative <- a.zerosLike().addSlice([|0; 0; 0; 0|], ad)
+        if not bConst then
+            // propagate to b
+            let aa = a.transpose(0, 1)
+            let fd = fderivative.transpose(0, 1)
+            let bd = aa.conv2d(fd, paddings=paddings).transpose(0, 1)
+            let bdBounds = array2D [[0;outputChannels-1;0]; [0;inputChannels-1;0]; [0;kernelHeight-1;0]; [0;kernelWidth-1;0]]
+            bderivative <- bd.GetSlice(bdBounds)
+        aderivative, bderivative
+    
+    /// <summary>Applies a 2D transposed convolution operator over an input signal composed of several input planes, sometimes also called 'deconvolution'.</summary>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    /// <param name="outputPadding">The additional size added to one side of each dimension in the output shape.</param>
+    /// <param name="outputPaddings">The additional sizes added to one side of each dimension in the output shape.</param>
+    member a.convTranspose2d(filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?outputPaddings:seq<int>) =
+        let b = filters
+        let strides, paddings, dilations = Shape.resolve2dConvSizes stride strides padding paddings dilation dilations
+        let outputPaddings = Shape.resolve2dConvOutputPadding outputPadding outputPaddings
+        let _, _, _, _, outputShape =
+            Shape.checkCanConvTranspose2d a.deviceType b.deviceType a.dtype b.dtype a.shape b.shape strides paddings dilations outputPaddings
+        let mutable b = b
+        if dilations[0] > 1 || dilations[1] > 1 then
+            b <- b.dilate([|1; 1; dilations[0]; dilations[1]|])
+        let fderivative = a
+        let a = a.zerosLike(outputShape)
+        // Use convolution reverse mode to implement transposed convolution
+        let (aderivative:Tensor), _ = Tensor.conv2dReverseDiff(a, b, fderivative, aConst=false, bConst=true, strides=strides, paddings=paddings)
+        aderivative
+
+    /// <summary>Applies a 3D convolution over an input signal composed of several input planes</summary>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on corresponding sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    member a.conv3d(filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>) =
+        let b = filters
+        let strides, paddings, dilations = Shape.resolve3dConvSizes stride strides padding paddings dilation dilations
+        Shape.checkCanConv3d a.deviceType b.deviceType a.dtype b.dtype a.shape b.shape strides paddings dilations |> ignore
+        let mutable b = b
+        if dilations[0] > 1 || dilations[1] > 1 || dilations[2] > 1 then
+            b <- b.dilate([|1; 1; dilations[0]; dilations[1]; dilations[2]|])
+        let inline fRaw(a:RawTensor,b) = a.Conv3D(b, strides, paddings)
+        Tensor.OpBinary(a, b, fRaw)
+
+    // a: input, NxCxDxHxW (batchSize x inputChannels x inputDepth x inputHeight x inputWidth)
+    // b: filters, KxCxExFxG (outputChannels x inputChannels x kernelDepth x kernelHeight x kernelWidth)
+    // t: output, NxKxLxMxN (batchSize x outputChannels x outputDepth x outputHeight x outputWidth)
+    static member internal conv3dReverseDiff(a: Tensor, b:Tensor, fderivative:Tensor, aConst:bool, bConst:bool, strides:int[], paddings:int[]) =
+        let a = if aConst then a else a.primal
+        let b = if bConst then b else b.primal
+        let batchSize = fderivative.shape[0]
+        let outputChannels = fderivative.shape[1]
+        // let outputDepth = fderivative.shape[2]
+        // let outputHeight = fderivative.shape[3]
+        // let outputWidth = fderivative.shape[4]
+        let inputChannels = a.shape[1]
+        let inputDepth = a.shape[2]
+        let inputHeight = a.shape[3]
+        let inputWidth = a.shape[4]
+        let kernelDepth = b.shape[2]
+        let kernelHeight = b.shape[3]
+        let kernelWidth = b.shape[4]
+        let mutable fderivative = fderivative
+        if strides[0] > 1 || strides[1] > 1 || strides[2] > 1 then
+            fderivative <- fderivative.dilate([|1;1;strides[0];strides[1];strides[2]|])
+        let mutable aderivative = a.zeroLike()
+        let mutable bderivative = b.zeroLike()
+        if not aConst then
+            // propagate to a
+            let bFlipped = b.flip([|2;3;4|])
+            let mutable ad = fderivative.conv3d(bFlipped.transpose(0, 1), paddings=[|kernelDepth-1; kernelHeight-1; kernelWidth-1|])
+            if paddings[0] > 0 || paddings[1] > 0 || paddings[2] > 0 then
+                let adBounds = array2D [[0; batchSize-1; 0]; 
+                                       [0; inputChannels-1; 0]; 
+                                       [paddings[0]; paddings[0] + inputDepth - 1; 0]; 
+                                       [paddings[1]; paddings[1] + inputHeight - 1; 0];
+                                       [paddings[2]; paddings[2] + inputWidth - 1; 0]]
+                ad <- ad.GetSlice(adBounds)
+                ad <- ad.view([|batchSize; inputChannels; inputDepth; inputHeight; inputWidth|])
+            aderivative <- a.zerosLike().addSlice([|0; 0; 0; 0; 0|], ad)
+        if not bConst then
+            // propagate to b
+            let aa = a.transpose(0, 1)
+            let fd = fderivative.transpose(0, 1)
+            let bd = aa.conv3d(fd, paddings=paddings).transpose(0, 1)
+            let bdBounds = array2D [[0;outputChannels-1;0]; [0;inputChannels-1;0]; [0;kernelDepth-1;0]; [0;kernelHeight-1;0]; [0;kernelWidth-1;0]]
+            bderivative <- bd.GetSlice(bdBounds)                
+        aderivative, bderivative
+
+    /// <summary>Applies a 3D transposed convolution operator over an input signal composed of several input planes, sometimes also called 'deconvolution'.</summary>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    /// <param name="outputPadding">The additional size added to one side of each dimension in the output shape.</param>
+    /// <param name="outputPaddings">The additional sizes added to one side of each dimension in the output shape.</param>
+    member a.convTranspose3d(filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?outputPaddings:seq<int>) =
+        let b = filters
+        let strides, paddings, dilations = Shape.resolve3dConvSizes stride strides padding paddings dilation dilations
+        let outputPaddings = Shape.resolve3dConvOutputPadding outputPadding outputPaddings
+        let _, _, _, _, outputShape =
+            Shape.checkCanConvTranspose3d a.deviceType b.deviceType a.dtype b.dtype a.shape b.shape strides paddings dilations outputPaddings
+        let mutable b = b
+        if dilations[0] > 1 || dilations[1] > 1 || dilations[2] > 1 then
+            b <- b.dilate([|1; 1; dilations[0]; dilations[1]; dilations[2]|])
+        let fderivative = a
+        let a = a.zerosLike(outputShape)
+        // Use convolution reverse mode to implement transposed convolution
+        let (aderivative:Tensor), _ = Tensor.conv3dReverseDiff(a, b, fderivative, aConst=false, bConst=true, strides=strides, paddings=paddings)
+        aderivative
+
diff --git a/src/TensorMath/TensorMath.Compose.fs b/src/TensorMath/TensorMath.Compose.fs
new file mode 100644
index 0000000..c83b06c
--- /dev/null
+++ b/src/TensorMath/TensorMath.Compose.fs
@@ -0,0 +1,287 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+module TensorMath.Compose
+
+// Pipelined operations for composing Tensor -> Tensor functions
+// The rule for binary operations like add, sub, mul, etc. is simple:
+// in the returned function, the functions argument is always taken as the first operand of the binary operation
+// For example:
+// static member add(b:Tensor) = fun (a:Tensor) -> a.add(b)
+// static member sub(b:Tensor) = fun (a:Tensor) -> a.sub(b)
+
+type dsharp with
+    /// <summary>TBD</summary>
+    static member inline tensor(?device:Device, ?dtype:Dtype, ?backend:Backend) = fun value -> Tensor.create(value=value, ?device=device, ?dtype=dtype, ?backend=backend)
+    
+    /// <summary>TBD</summary>
+    /// <summary>Returns a tensor where each row contains <paramref name="numSamples"/> indices sampled from the multinomial probability distribution located in the corresponding row of tensor input.</summary>
+    /// <param name="numSamples">Number of samples to draw</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    /// <remarks>
+    /// Indices are ordered from left to right according to when each was sampled (first samples are placed in first column).
+    /// 
+    /// If input is a vector, out is a vector of size num_samples.
+    /// 
+    /// If input is a matrix with m rows, the result is an matrix of shape (m × numSamples)
+    /// </remarks>
+    static member multinomial(numSamples:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (probs:Tensor) -> probs.multinomial(numSamples, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member bernoulli(?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (probs:Tensor) -> probs.bernoulli(?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member dropout(?p:double) = fun (a:Tensor) -> a.dropout(?p=p)
+
+    /// <summary>TBD</summary>
+    static member dropout2d(?p:double) = fun (a:Tensor) -> a.dropout2d(?p=p)
+
+    /// <summary>TBD</summary>
+    static member dropout3d(?p:double) = fun (a:Tensor) -> a.dropout3d(?p=p)
+
+    /// <summary>TBD</summary>
+    static member zerosLike(shape:seq<int>, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.zerosLike(shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member onesLike(shape:seq<int>, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.onesLike(shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member fullLike(value:scalar, ?shape, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.fullLike(value, ?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member arangeLike(endVal:float, ?startVal:float, ?step:float, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (a:Tensor) -> a.arangeLike(endVal=endVal, ?startVal=startVal, ?step=step, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member arangeLike(endVal:int, ?startVal:int, ?step:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (a:Tensor) -> a.arangeLike(endVal=endVal, ?startVal=startVal, ?step=step, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member linspaceLike(startVal:float, endVal:float, steps:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (a:Tensor) -> a.linspaceLike(startVal=startVal, endVal=endVal, steps=steps, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member linspaceLike(startVal:int, endVal:int, steps:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (a:Tensor) -> a.linspaceLike(startVal=startVal, endVal=endVal, steps=steps, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member logspaceLike(startVal:float, endVal:float, steps:int, ?baseVal:float, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (a:Tensor) -> a.logspaceLike(startVal=startVal, endVal=endVal, steps=steps, ?baseVal=baseVal, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member logspaceLike(startVal:int, endVal:int, steps:int, ?baseVal:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) = fun (a:Tensor) -> a.logspaceLike(startVal=startVal, endVal=endVal, steps=steps, ?baseVal=baseVal, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member onehotLike(length:int, hot:int, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.onehotLike(length, hot, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member randLike(shape:seq<int>, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.randLike(shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member randnLike(shape:seq<int>, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.randnLike(shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member randintLike(low:int, high:int, ?shape:seq<int>, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.randintLike(low=low, high=high, ?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member like(value:obj, ?device, ?dtype, ?backend) = fun (a:Tensor) -> a.like(value, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>TBD</summary>
+    static member lt(b:Tensor) = fun (a:Tensor) -> a.lt(b)
+
+    /// <summary>TBD</summary>
+    static member gt(b:Tensor) = fun (a:Tensor) -> a.gt(b)
+
+    /// <summary>TBD</summary>
+    static member le(b:Tensor) = fun (a:Tensor) -> a.le(b)
+
+    /// <summary>TBD</summary>
+    static member ge(b:Tensor) = fun (a:Tensor) -> a.ge(b)
+
+    /// <summary>TBD</summary>
+    static member eq(b:Tensor) = fun (a:Tensor) -> a.eq(b)
+
+    /// <summary>TBD</summary>
+    static member clamp(?low:scalar, ?high:scalar) = fun (a:Tensor) -> a.clamp(?low=low, ?high=high)
+
+    /// <summary>TBD</summary>
+    static member diagonal(offset:int, ?dim1:int, ?dim2:int) = fun (a:Tensor) -> a.diagonal(offset=offset, ?dim1=dim1, ?dim2=dim2)
+
+    /// <summary>TBD</summary>
+    static member expand(shape:seq<int>) = fun (a:Tensor) -> a.expand(shape)
+
+    /// <summary>TBD</summary>
+    static member expandAs(b:Tensor) = fun (a:Tensor) -> a.expandAs(b)
+
+    /// <summary>TBD</summary>
+    static member stack(dim:int) = fun (tensors:seq<Tensor>) -> Tensor.stack(tensors, dim=dim)
+
+    /// <summary>TBD</summary>
+    static member unstack(dim:int) = fun (a:Tensor) -> a.unstack(dim=dim)
+
+    /// <summary>TBD</summary>
+    static member cat(dim:int) = fun (tensors:seq<Tensor>) -> Tensor.cat(tensors, dim=dim)
+
+    /// <summary>TBD</summary>
+    static member split(sizes:seq<int>, ?dim:int) = fun (a:Tensor) -> a.split(sizes, ?dim=dim)
+
+    /// <summary>TBD</summary>
+    static member add(b:Tensor) = fun (a:Tensor) -> a.add(b)
+
+    /// <summary>TBD</summary>
+    static member sub(b:Tensor) = fun (a:Tensor) -> a.sub(b)
+
+    /// <summary>TBD</summary>
+    static member mul(b:Tensor) = fun (a:Tensor) -> a.mul(b)
+
+    /// <summary>TBD</summary>
+    static member div(b:Tensor) = fun (a:Tensor) -> a.div(b)
+
+    /// <summary>TBD</summary>
+    static member pow(b:Tensor) = fun (a:Tensor) -> a.pow(b)
+
+    /// <summary>TBD</summary>
+    static member matmul(b:Tensor) = fun (a:Tensor) -> a.matmul(b)
+
+    /// <summary>TBD</summary>
+    static member dot(b:Tensor) = fun (a:Tensor) -> a.dot(b)
+
+    /// <summary>TBD</summary>
+    static member sum(dim:int, ?keepDim:bool) = fun (a:Tensor) -> a.sum(dim, ?keepDim=keepDim)
+
+    /// <summary>TBD</summary>
+    static member mean(dim:int, ?keepDim:bool) = fun (a:Tensor) -> a.mean(dim, ?keepDim=keepDim)
+
+    /// <summary>TBD</summary>
+    static member var(dim:int, ?keepDim:bool, ?unbiased:bool) = fun (a:Tensor) -> a.var(dim, ?keepDim=keepDim, ?unbiased=unbiased)
+
+    /// <summary>TBD</summary>
+    static member std(dim:int, ?keepDim:bool, ?unbiased:bool) = fun (a:Tensor) -> a.std(dim, ?keepDim=keepDim, ?unbiased=unbiased)
+
+    /// <summary>TBD</summary>
+    static member cov(?correction:int64, ?fweights:Tensor, ?aweights:Tensor) = fun (a:Tensor) -> a.cov(?correction=correction,?fweights=fweights,?aweights=aweights)
+
+    /// <summary>TBD</summary>
+    static member corrcoef() = fun (a:Tensor) -> a.corrcoef()
+
+    /// <summary>TBD</summary>
+    static member gather(dim:int, indices:Tensor) = fun (a:Tensor) -> a.gather(dim, indices)
+
+    /// <summary>TBD</summary>
+    static member scatter(dim:int, indices:Tensor, destinationShape:seq<int>) = fun (a:Tensor) -> a.scatter(dim, indices, destinationShape)
+
+    /// <summary>TBD</summary>
+    static member transpose(dim0:int, dim1:int) = fun (a:Tensor) -> a.transpose(dim0, dim1)
+
+    /// <summary>TBD</summary>
+    static member squeeze(?dim:int) = fun (a:Tensor) -> a.squeeze(?dim=dim)
+
+    /// <summary>TBD</summary>
+    static member unsqueeze(dim:int) = fun (a:Tensor) -> a.unsqueeze(dim)
+
+    /// <summary>TBD</summary>
+    static member unsqueezeAs(other:Tensor) = fun (a:Tensor) -> a.unsqueezeAs(other)
+
+    /// <summary>TBD</summary>
+    static member flip(dims:seq<int>) = fun (a:Tensor) -> a.flip(dims)
+
+    /// <summary>TBD</summary>
+    static member dilate(dilations:seq<int>) = fun (a:Tensor) -> a.dilate(dilations)
+
+    /// <summary>TBD</summary>
+    static member undilate(dilations:seq<int>) = fun (a:Tensor) -> a.undilate(dilations)
+
+    /// <summary>TBD</summary>
+    static member repeat(dim:int, times:int) = fun (a:Tensor) -> a.repeat(dim, times)
+
+    /// <summary>TBD</summary>
+    static member slice(index:seq<int>) = fun (a:Tensor) -> a[index |> Seq.toArray]
+
+    /// <summary>TBD</summary>
+    static member view(shape:seq<int>) = fun (a:Tensor) -> a.view(shape)
+
+    /// <summary>TBD</summary>
+    static member view(shape:int) = fun (a:Tensor) -> a.view(shape)
+
+    /// <summary>TBD</summary>
+    static member viewAs(b:Tensor) = fun (a:Tensor) -> a.viewAs(b)
+
+    /// <summary>TBD</summary>
+    static member flatten(startDim:int, ?endDim:int) = fun (a:Tensor) -> a.flatten(startDim=startDim, ?endDim=endDim)
+
+    /// <summary>TBD</summary>
+    static member unflatten(dim:int, unflattenedShape:seq<int>) = fun (a:Tensor) -> a.unflatten(dim, unflattenedShape)
+
+    /// <summary>TBD</summary>
+    static member leakyRelu(?negativeSlope:float) = fun (a:Tensor) -> a.leakyRelu(?negativeSlope=negativeSlope)
+
+    /// <summary>TBD</summary>
+    static member softmax(dim:int) = fun (a:Tensor) -> a.softmax(dim)
+
+    /// <summary>TBD</summary>
+    static member logsoftmax(dim:int) = fun (a:Tensor) -> a.logsoftmax(dim)
+
+    /// <summary>TBD</summary>
+    static member logsumexp(dim:int, ?keepDim:bool) = fun (a:Tensor) -> a.logsumexp(dim, ?keepDim=keepDim)
+
+    /// <summary>TBD</summary>
+    static member mseLoss(target:Tensor) = fun (input:Tensor) -> input.mseLoss(target)
+
+    /// <summary>TBD</summary>
+    static member bceLoss(target:Tensor) = fun (input:Tensor) -> input.bceLoss(target)
+
+    /// <summary>TBD</summary>
+    static member nllLoss(target:Tensor) = fun (input:Tensor) -> input.nllLoss(target)
+
+    /// <summary>TBD</summary>
+    static member crossEntropyLoss(target:Tensor) = fun (input:Tensor) -> input.crossEntropyLoss(target)
+
+    /// <summary>TBD</summary>
+    static member maxpool1d(kernelSize:int, ?stride:int, ?padding:int) = fun (a:Tensor) -> a.maxpool1d(kernelSize, ?stride=stride, ?padding=padding)
+
+    /// <summary>TBD</summary>
+    static member maxpool2d(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) = fun (a:Tensor) -> a.maxpool2d(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings)
+
+    /// <summary>TBD</summary>
+    static member maxpool3d(?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) = fun (a:Tensor) -> a.maxpool3d(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings)
+
+    /// <summary>TBD</summary>
+    static member maxunpool1d(indices:Tensor, kernelSize:int, ?stride:int, ?padding:int, ?outputSize:seq<int>) = fun (a:Tensor) -> a.maxunpool1d(indices, kernelSize, ?stride=stride, ?padding=padding, ?outputSize=outputSize)
+
+    /// <summary>TBD</summary>
+    static member maxunpool2d(indices:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?outputSize:seq<int>) = fun (a:Tensor) -> a.maxunpool2d(indices, ?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings, ?outputSize=outputSize)
+
+    /// <summary>TBD</summary>
+    static member maxunpool3d(indices:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?outputSize:seq<int>) = fun (a:Tensor) -> a.maxunpool3d(indices, ?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings, ?outputSize=outputSize)
+
+    /// <summary>TBD</summary>
+    static member conv1d(b:Tensor, ?stride:int, ?padding:int, ?dilation:int) = fun (a:Tensor) -> a.conv1d(b, ?stride=stride, ?padding=padding, ?dilation=dilation)
+
+    /// <summary>TBD</summary>
+    static member conv2d(b:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) = fun (a:Tensor) -> a.conv2d(b, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+
+    /// <summary>TBD</summary>
+    static member conv3d(b:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) = fun (a:Tensor) -> a.conv3d(b, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+
+    /// <summary>TBD</summary>
+    static member convTranspose1d(b:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int) = fun (a:Tensor) -> a.convTranspose1d(b, ?stride=stride, ?padding=padding, ?dilation=dilation, ?outputPadding=outputPadding)
+
+    /// <summary>TBD</summary>
+    static member convTranspose2d(b:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?outputPaddings:seq<int>) = fun (a:Tensor) -> a.convTranspose2d(b, ?stride=stride, ?padding=padding, ?dilation=dilation, ?outputPadding=outputPadding, ?strides=strides, ?paddings=paddings, ?dilations=dilations, ?outputPaddings=outputPaddings)
+
+    /// <summary>TBD</summary>
+    static member convTranspose3d(b:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?outputPaddings:seq<int>) = fun (a:Tensor) -> a.convTranspose3d(b, ?stride=stride, ?padding=padding, ?dilation=dilation, ?outputPadding=outputPadding, ?strides=strides, ?paddings=paddings, ?dilations=dilations, ?outputPaddings=outputPaddings)
+
+    /// <summary>TBD</summary>
+    static member pad(paddings:seq<int>) = fun (a:Tensor) -> a.pad(paddings)
+
+    /// <summary>TBD</summary>
+    static member toImage(?pixelMin:double, ?pixelMax:double, ?normalize:bool, ?gridCols:int) = fun (a:Tensor) -> a.toImage(?pixelMin=pixelMin, ?pixelMax=pixelMax, ?normalize=normalize, ?gridCols=gridCols)
+
+    /// <summary>TBD</summary>
+    static member toImageString(?pixelMin:double, ?pixelMax:double, ?normalize:bool, ?gridCols:int, ?asciiPalette:string) = fun (a:Tensor) -> a.toImageString(?pixelMin=pixelMin, ?pixelMax=pixelMax, ?normalize=normalize, ?gridCols=gridCols, ?asciiPalette=asciiPalette)
+
+    /// <summary>TBD</summary>
+    static member cast(dtype:Dtype) = fun (a:Tensor) -> a.cast(dtype)
+
+    /// <summary>TBD</summary>
+    static member move(?device, ?dtype, ?backend) = fun (a:Tensor) -> a.move(?device=device, ?dtype=dtype, ?backend=backend)
diff --git a/src/TensorMath/TensorMath.fs b/src/TensorMath/TensorMath.fs
new file mode 100644
index 0000000..e155065
--- /dev/null
+++ b/src/TensorMath/TensorMath.fs
@@ -0,0 +1,1468 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace TensorMath
+
+open TensorMath.Backends
+open TensorMath.Util
+
+/// Tensor operations
+type dsharp =
+
+    /// <summary>
+    /// Creates a new tensor from the given data, using the given element type and configuration.
+    /// </summary>
+    /// 
+    /// <example><code>
+    ///    let t1 = dsharp.tensor [ 1 .. 10 ]
+    ///    let t2 = dsharp.tensor [ [ 1.0; 3.0; 4.0 ];
+    ///                             [ 1.02; 3.04; 4.01 ] ]
+    /// </code></example>
+    /// 
+    /// <remarks>
+    ///  The data is converted from arrays, sequences, lists and tuples of primitive values to a tensor whose shape is inferred from the data.
+    /// </remarks>
+    /// <param name="value">The .NET object used to form the initial values for the tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    /// <remarks>The fastest creation technique is a one dimensional array matching the desired dtype. Then use 'view' to reshape.</remarks>
+    static member tensor(value:obj, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        Tensor.create(value=value, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Seeds all backends with the given random seed, or a new seed based on the current time if no seed is specified.</summary>
+    static member seed(?seed:int) = BackendTensorStatics.Seed(?seed=seed)
+
+    /// <summary>Indicates if an object is a tensor</summary>
+    static member isTensor(value:obj) = value :? Tensor
+
+    /// <summary>Returns the version of the TensorMath.Core assembly.</summary>
+    static member version = typeof<Tensor>.Assembly.GetName().Version.ToString()
+
+    /// <summary>Saves the object to the given file using a bespoke binary format.</summary>
+    /// <remarks>
+    ///   The format used may change from version to version of TensorMath.
+    /// </remarks>
+    static member save(value:obj, fileName) = saveBinary value fileName
+
+    /// <summary>Loads an object from the given file using a bespoke binary format.</summary>
+    /// <remarks>
+    ///   The format used may change from version to version of TensorMath.
+    /// </remarks>
+    // TODO: this can be improved to traverse the loaded data structure to discover any contained Tensor objects
+    // and move all tensors to the config specified by a given set of device, dtype, backend arguments.
+    static member load(fileName) = loadBinary fileName
+
+    /// <summary>Returns a new uninitialized tensor filled with arbitrary values for the given shape, element type and configuration</summary>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member empty(shape:seq<int>, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Empty(shape|>Seq.toArrayQuick, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new uninitialized tensor filled with arbitrary values for the given length, element type and configuration</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member empty(length:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Empty([|length|], ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new empty tensor holding no data, for the given element type and configuration</summary>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member empty(?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        Tensor.create(value=[], ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Get the scalar zero tensor for the given configuration</summary>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member zero(?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Zero(?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor filled with '0' values for the given shape, element type and configuration</summary>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member zeros(shape:seq<int>, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Zeros(shape|>Shape.create, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor filled with '0' values for the given length, element type and configuration</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member zeros(length:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Zeros([|length|], ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Get the scalar '1' tensor for the given configuration</summary>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member one(?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.One(?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor filled with '1' values for the given shape, element type and configuration</summary>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member ones(shape:seq<int>, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Ones(shape|>Shape.create, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor of the given length filled with '1' values for the given element type and configuration</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member ones(length:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Ones([|length|], ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor filled with the scalar <paramref name="value" />, for the given shape, element type and configuration</summary>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="value">The scalar used to form the initial values for the tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member full(shape:seq<int>, value:scalar, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Full(shape|>Shape.create, value, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a new tensor of the given length filled with <paramref name="value" />, for the given element type and configuration</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="value">The scalar giving the the initial values for the tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member full(length:int, value:scalar, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).fullLike(value, [|length|])
+
+    /// <summary>Returns a new scalar tensor with the value <paramref name="value" />, for the given element type and configuration</summary>
+    /// <param name="value">The scalar giving the the initial values for the tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member scalar(value:scalar, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.full(Shape.scalar, value, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>
+    /// Returns a 1-D tensor of size \(\left\lceil \frac{\text{end} - \text{start}}{\text{step}} \right\rceil\)
+    /// with values from the interval [start, end) taken with common difference step beginning from start.
+    /// </summary>
+    /// 
+    /// <remarks>
+    ///  Non-integer steps may be subject to floating point rounding errors when comparing against end.
+    /// </remarks>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="startVal">The starting value for the set of points. Default: 0.</param>
+    /// <param name="step">The gap between each pair of adjacent points. Default: 1.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member arange(endVal:float, ?startVal:float, ?step:float, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).arangeLike(endVal=endVal, ?startVal=startVal, ?step=step)
+
+    /// <summary>
+    /// Returns a 1-D tensor of size \(\left\lceil \frac{\text{end} - \text{start}}{\text{step}} \right\rceil\)
+    /// with values from the interval [start, end) taken with common difference step beginning from start.
+    /// </summary>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="startVal">The starting value for the set of points. Default: 0.</param>
+    /// <param name="step">The gap between each pair of adjacent points. Default: 1.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member arange(endVal:int, ?startVal:int, ?step:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).arangeLike(endVal=endVal, ?startVal=startVal, ?step=step)
+
+    /// <summary>
+    /// Returns a 1-D tensor of size <paramref name="steps"/> whose values are evenly spaced from <paramref name="startVal"/> to <paramref name="endVal"/>. The values are going to be: \(
+    /// (\text{startVal},
+    /// \text{startVal} + \frac{\text{endVal} - \text{startVal}}{\text{steps} - 1},
+    /// \ldots,
+    /// \text{startVal} + (\text{steps} - 2) * \frac{\text{endVal} - \text{startVal}}{\text{steps} - 1},
+    /// \text{endVal}) 
+    /// \)
+    /// </summary>
+    /// <param name="startVal">The starting value for the set of points.</param>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="steps">The size of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member linspace(startVal:float, endVal:float, steps:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).linspaceLike(startVal=startVal, endVal=endVal, steps=steps)
+
+    /// <summary>
+    /// Returns a 1-D tensor of size <paramref name="steps"/> whose values are evenly spaced from <paramref name="startVal"/> to <paramref name="endVal"/>. The values are going to be: \(
+    /// (\text{startVal},
+    /// \text{startVal} + \frac{\text{endVal} - \text{startVal}}{\text{steps} - 1},
+    /// \ldots,
+    /// \text{startVal} + (\text{steps} - 2) * \frac{\text{endVal} - \text{startVal}}{\text{steps} - 1},
+    /// \text{endVal}) 
+    /// \)
+    /// </summary>
+    /// <param name="startVal">The starting value for the set of points.</param>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="steps">The size of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member linspace(startVal:int, endVal:int, steps:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).linspaceLike(startVal=startVal, endVal=endVal, steps=steps)
+
+    /// <summary>
+    /// Returns a 1-D tensor of size <paramref name="steps"/> whose values are evenly spaced logarithmically from \(\text{baseVal}^{\text{startVal}}\) to \(\text{baseVal}^{\text{endVal}}\). The values are going to be: \(
+    /// (\text{baseVal}^{\text{startVal}},
+    /// \text{baseVal}^{(\text{startVal} + \frac{\text{endVal} - \text{startVal}}{ \text{steps} - 1})},
+    /// \ldots,
+    /// \text{baseVal}^{(\text{startVal} + (\text{steps} - 2) * \frac{\text{endVal} - \text{startVal}}{ \text{steps} - 1})},
+    /// \text{baseVal}^{\text{endVal}})
+    /// \)
+    /// </summary>
+    /// <param name="startVal">The starting value for the set of points.</param>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="steps">The size of the returned tensor.</param>
+    /// <param name="baseVal">The base of the logarithm. Default: 10.0.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member logspace(startVal:float, endVal:float, steps:int, ?baseVal:float, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).logspaceLike(startVal=startVal, endVal=endVal, steps=steps, ?baseVal=baseVal)
+
+    /// <summary>
+    /// Returns a 1-D tensor of size <paramref name="steps"/> whose values are evenly spaced logarithmically from \(\text{baseVal}^{\text{startVal}}\) to \(\text{baseVal}^{\text{endVal}}\). The values are going to be: \(
+    /// (\text{baseVal}^{\text{startVal}},
+    /// \text{baseVal}^{(\text{startVal} + \frac{\text{endVal} - \text{startVal}}{ \text{steps} - 1})},
+    /// \ldots,
+    /// \text{baseVal}^{(\text{startVal} + (\text{steps} - 2) * \frac{\text{endVal} - \text{startVal}}{ \text{steps} - 1})},
+    /// \text{baseVal}^{\text{endVal}})
+    /// \)
+    /// </summary>
+    /// <param name="startVal">The starting value for the set of points.</param>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="steps">The size of the returned tensor.</param>
+    /// <param name="baseVal">The base of the logarithm. Default: 10.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member logspace(startVal:int, endVal:int, steps:int, ?baseVal:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).logspaceLike(startVal=startVal, endVal=endVal, steps=steps, ?baseVal=baseVal)
+
+    /// <summary>Returns a 2-D tensor with ones on the diagonal and zeros elsewhere.</summary>
+    /// <param name="rows">The number of rows</param>
+    /// <param name="cols">The number of columns with default being n</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member eye(rows:int, ?cols:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        Tensor.eye(rows=rows, ?cols=cols, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a one-hot tensor, with one location set to 1, and all others 0.</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="hot">The location to set to 1.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member onehot(length:int, hot:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) = 
+        dsharp.zero(?device=device, ?dtype=dtype, ?backend=backend).onehotLike(length, hot)
+
+    /// <summary>Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)</summary>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member rand(shape:seq<int>, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Random(shape|>Shape.create, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member rand(length:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.Random([|length|], ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 (also called the standard normal distribution).</summary>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member randn(shape:seq<int>, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.RandomNormal(shape|>Shape.create, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 (also called the standard normal distribution).</summary>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member randn(length:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.RandomNormal([|length|], ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a tensor filled with random integers generated uniformly between low (inclusive) and high (exclusive).</summary>
+    /// <param name="low">Lowest integer to be drawn from the distribution. Default: 0..</param>
+    /// <param name="high">One above the highest integer to be drawn from the distribution.</param>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member randint(low:int, high:int, shape:seq<int>, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.RandomInt(shape|>Shape.create, low, high, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a tensor filled with random integers generated uniformly between low (inclusive) and high (exclusive).</summary>
+    /// <param name="low">Lowest integer to be drawn from the distribution. Default: 0..</param>
+    /// <param name="high">One above the highest integer to be drawn from the distribution.</param>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member randint(low:int, high:int, length:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        TensorC(RawTensor.RandomInt([|length|], low, high, ?device=device, ?dtype=dtype, ?backend=backend))
+
+    /// <summary>Returns a tensor where each row contains numSamples indices sampled from the multinomial probability distribution located in the corresponding row of tensor input.</summary>
+    /// <param name="probs">The input tensor containing probabilities.</param>
+    /// <param name="numSamples">The number of samples to draw.</param>
+    /// <param name="normalize">Indicates where the probabilities should first be normalized by their sum.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member multinomial(probs:Tensor, numSamples:int, ?normalize:bool, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        probs.multinomial(numSamples, ?normalize=normalize, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Draws binary random numbers (0 or 1) from a Bernoulli distribution</summary>
+    /// <param name="probs">The input tensor of probability values for the Bernoulli distribution.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, uses Device.Default.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, uses Dtype.Default.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, uses Backend.Default.</param>
+    static member bernoulli(probs:Tensor, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        probs.bernoulli(?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Randomly zeroes some of the elements of the input tensor with probability p using samples from a Bernoulli distribution</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="p">The probability of an element to be zeroed. Default: 0.5.</param>
+    static member dropout(input:Tensor, ?p:double) = input.dropout(?p=p)
+
+    /// <summary>Randomly zero out entire channels (a channel is a 2D feature map, e.g., the jj -th channel of the ii -th sample in the batched input is a 2D tensor \text{input}[i, j]input[i,j] ). Each channel will be zeroed out independently on every forward call with probability p using samples from a Bernoulli distribution</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="p">The probability of an element to be zeroed. Default: 0.5.</param>
+    static member dropout2d(input:Tensor, ?p:double) = input.dropout2d(?p=p)
+
+    /// <summary>Randomly zero out entire channels (a channel is a 3D feature map, e.g., the jj -th channel of the ii -th sample in the batched input is a 3D tensor \text{input}[i, j]input[i,j] ). Each channel will be zeroed out independently on every forward call with probability p using samples from a Bernoulli distribution.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="p">The probability of an element to be zeroed. Default: 0.5.</param>
+    static member dropout3d(input:Tensor, ?p:double) = input.dropout3d(?p=p)
+
+    /// <summary>Returns a new tensor filled with '0' values with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor. Default: If None, the shape of the input tensor is used.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member zerosLike(input:Tensor, ?shape:seq<int>, ?device, ?dtype, ?backend) =
+        input.zerosLike(?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a new tensor filled with '1' values with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor. Default: If None, the shape of the input tensor is used.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member onesLike(input:Tensor, ?shape:seq<int>, ?device, ?dtype, ?backend) =
+        input.onesLike(?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a new tensor filled with the given scalar value with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="value">The scalar giving the the initial values for the tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor. Default: If None, the shape of the input tensor is used.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member fullLike(input:Tensor, value:scalar, ?shape:seq<int>, ?device, ?dtype, ?backend) =
+        input.fullLike(value, ?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>
+    /// A version of dsharp.arange with characteristics based on the input tensor.
+    /// </summary>
+    /// 
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="startVal">The starting value for the set of points. Default: 0.</param>
+    /// <param name="step">The gap between each pair of adjacent points. Default: 1.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member arangeLike(input:Tensor, endVal:float, ?startVal:float, ?step:float, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        input.arangeLike(endVal=endVal, ?startVal=startVal, ?step=step, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>
+    /// A version of dsharp.arange with characteristics based on the input tensor.
+    /// </summary>
+    /// 
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="endVal">The ending value for the set of points.</param>
+    /// <param name="startVal">The starting value for the set of points. Default: 0.</param>
+    /// <param name="step">The gap between each pair of adjacent points. Default: 1.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member arangeLike(input:Tensor, endVal:int, ?startVal:int, ?step:int, ?device:Device, ?dtype:Dtype, ?backend:Backend) =
+        input.arangeLike(endVal=endVal, ?startVal=startVal, ?step=step, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>
+    /// A version of dsharp.onehot with characteristics based on the input tensor.
+    /// </summary>
+    /// 
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="length">The length of the returned tensor.</param>
+    /// <param name="hot">The location to set to 1.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member onehotLike(input:Tensor, length:int, hot:int, ?device, ?dtype, ?backend) =
+        input.onehotLike(length, hot, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1) with characteristics based on the input tensor</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor. Default: If None, the shape of the input tensor is used.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member randLike(input:Tensor, ?shape:seq<int>, ?device, ?dtype, ?backend) =
+            input.randLike(?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 (also called the standard normal distribution) with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor. Default: If None, the shape of the input tensor is used.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member randnLike(input:Tensor, ?shape:seq<int>, ?device, ?dtype, ?backend) =
+        input.randnLike(?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a tensor with the same shape as Tensor input filled with random integers generated uniformly between low (inclusive) and high (exclusive) with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="low">Lowest integer to be drawn from the distribution. Default: 0..</param>
+    /// <param name="high">One above the highest integer to be drawn from the distribution.</param>
+    /// <param name="shape">The desired shape of returned tensor. Default: If None, the shape of the input tensor is used.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member randintLike(input:Tensor, low:int, high:int, ?shape:seq<int>, ?device, ?dtype, ?backend) =
+        input.randintLike(low=low, high=high, ?shape=shape, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns the '0' scalar tensor with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member zeroLike(input:Tensor, ?device, ?dtype, ?backend) =
+        input.zeroLike(?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns the '0' scalar tensor with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member oneLike(input:Tensor, ?device, ?dtype, ?backend) =
+        input.oneLike(?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns the total number of elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member nelement(input:Tensor) = input.nelement
+
+    /// <summary>Returns a new tensor based on the given .NET value with characteristics based on the input tensor.</summary>
+    /// <param name="input">The shape and characteristics of input will determine those of the output tensor.</param>
+    /// <param name="value">The .NET object giving the the initial values for the tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member like(input:Tensor, value:obj, ?device, ?dtype, ?backend) =
+        input.like(value, ?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Returns a new tensor with the same characteristics and storage cloned.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member clone(input:Tensor) = input.clone()
+
+    /// <summary>Returns a boolean tensor for the element-wise less-than comparison of the elements in the two tensors.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member lt(a:Tensor, b:Tensor) = a.lt(b)
+
+    /// <summary>Returns a boolean tensor for the element-wise greater-than comparison of the elements in the two tensors.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member gt(a:Tensor, b:Tensor) = a.gt(b)
+
+    /// <summary>Return a boolean tensor for the element-wise less-than-or-equal comparison of the elements in the two tensors.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member le(a:Tensor, b:Tensor) = a.le(b)
+
+    /// <summary>Returns a boolean tensor for the element-wise greater-than-or-equal comparison of the elements in the two tensors.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member ge(a:Tensor, b:Tensor) = a.ge(b)
+
+    /// <summary>Returns a boolean tensor for the element-wise equality comparison of the elements in the two tensors.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member eq(a:Tensor, b:Tensor) = a.eq(b)    
+
+    /// <summary>Returns a boolean tensor for the element-wise non-equality comparison of the elements in the two tensors.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member ne(a:Tensor, b:Tensor) = a.ne(b)    
+
+    /// <summary>Returns a boolean tensor where each element indicates if the corresponding element in the input tensor is an infinity value.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member isinf(input:Tensor) = input.isinf()
+
+    /// <summary>Returns a boolean tensor where each element indicates if the corresponding element in the input tensor is a NaN (not-a-number) value.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member isnan(input:Tensor) = input.isnan()
+
+    /// <summary>Returns a boolean indicating if any element of the tensor is infinite.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member hasinf(input:Tensor) = input.hasinf()
+
+    /// <summary>Returns a boolean indicating if any element of the tensor is a not-a-number (NaN) value.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member hasnan(input:Tensor) = input.hasnan()
+
+    /// <summary>Returns the indices of the maximum value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member argmax(input:Tensor) = input.argmax()
+
+    /// <summary>Returns the indices of the maximum value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member argmax(input:Tensor, dim:int, ?keepDim:bool) = input.argmax(dim=dim, ?keepDim=keepDim)
+
+    /// <summary>Returns the indices of the minimum value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member argmin(input:Tensor) = input.argmin()
+
+    /// <summary>Returns the indices of the minimum value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member argmin(input:Tensor, dim:int, ?keepDim:bool) = input.argmin(dim=dim, ?keepDim=keepDim)
+
+    /// <summary>Returns the maximum value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member max(input:Tensor) = input.max()
+
+    /// <summary>Returns the minimum value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member min(input:Tensor) = input.min()
+
+    /// <summary>Each element of the tensor input is compared with the corresponding element of the tensor other and an element-wise maximum is taken.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member max(a:Tensor, b:Tensor) = a.max(b)
+
+    /// <summary>Each element of the tensor input is compared with the corresponding element of the tensor other and an element-wise minimum is taken.</summary>
+    /// <remarks>The shapes of input and other don’t need to match, but they must be broadcastable.</remarks>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member min(a:Tensor, b:Tensor) = a.min(b)
+
+    /// <summary>Returns the maximum value of all elements in the input tensor along the given dimension.</summary>
+    /// <param name="a">The tensor.</param>
+    /// <param name="dim">The dimension.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member max(a:Tensor, dim:int, ?keepDim:bool) = a.max(dim=dim, ?keepDim=keepDim)
+
+    /// <summary>Returns the minimum value of all elements in the input tensor along the given dimension.</summary>
+    /// <param name="a">The tensor.</param>
+    /// <param name="dim">The dimension.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member min(a:Tensor, dim:int, ?keepDim:bool) = a.min(dim=dim, ?keepDim=keepDim)
+
+    /// <summary>Clamp all elements in input into the range [ low..high] and return a resulting tensor</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="low">The lower-bound of the range to be clamped to.</param>
+    /// <param name="high">The upper-bound of the range to be clamped to.</param>
+    static member clamp(input:Tensor, ?low:scalar, ?high:scalar) = input.clamp(?low=low, ?high=high)
+
+    /// <summary>Normalizes a vector so all the values are between zero and one (min-max scaling to 0..1).</summary>
+    /// <param name="input">The input tensor.</param>
+    static member normalize(input:Tensor) = input.normalize()
+
+    /// <summary>Returns the tensor after standardization (z-score normalization)</summary>
+    /// <param name="input">The input tensor.</param>
+    static member standardize(input:Tensor) = input.standardize()
+
+    /// <summary>
+    ///  Returns a tensor with the diagonal elements with respect to <c>dim1</c> and <c>dim2</c>.
+    ///  The argument offset controls which diagonal to consider.
+    /// </summary>
+    /// <param name="input">The input tensor. Must be at least 2-dimensional.</param>
+    /// <param name="offset">Which diagonal to consider. Default: 0.</param>
+    /// <param name="dim1">The first dimension with respect to which to take diagonal. Default: 0..</param>
+    /// <param name="dim2">The second dimension with respect to which to take diagonal. Default: 1.</param>
+    static member diagonal(input:Tensor, ?offset:int, ?dim1:int, ?dim2:int) =
+        input.diagonal(?offset=offset, ?dim1=dim1, ?dim2=dim2)
+
+    /// <summary>Returns the sum of the elements of the diagonal of the input 2-D matrix</summary>
+    /// <param name="input">The input tensor.</param>
+    static member trace(input:Tensor) = input.trace()
+
+    /// <summary>Returns a new view of the input tensor with singleton dimensions expanded to a larger size</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    static member expand(input:Tensor, shape:seq<int>) = input.expand(shape)
+
+    /// <summary>Expand the input tensor to the same size as other tensor</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="other">The result tensor has the same size as other.</param>
+    static member expandAs(input:Tensor, other:Tensor) = input.expandAs(other)
+
+    /// <summary>Concatenates sequence of tensors along a new dimension</summary>
+    /// <remarks>All tensors need to be of the same size.</remarks>
+    /// <param name="tensors">The sequence of tensors to concatenate.</param>
+    /// <param name="dim">The dimension to insert. Has to be between 0 and the number of dimensions of concatenated tensors (inclusive).</param>
+    static member stack(tensors:seq<Tensor>, ?dim:int) = Tensor.stack(tensors, ?dim=dim)
+
+    /// <summary>Removes a tensor dimension</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to remove.</param>
+    static member unstack(input:Tensor, ?dim:int) = input.unstack(?dim=dim)
+
+    /// <summary>Concatenates the given sequence of seq tensors in the given dimension. All tensors must either have the same shape (except in the concatenating dimension) or be empty.</summary>
+    /// <param name="tensors">The sequence of tensors to concatenate.</param>
+    /// <param name="dim">The the dimension over which the tensors are concatenated.</param>
+    static member cat(tensors:seq<Tensor>, ?dim:int) = Tensor.cat(tensors, ?dim=dim)
+
+    /// <summary>Splits the tensor into chunks. The tensor will be split into sizes.Length chunks each with a corresponding size in the given dimension.</summary>
+    /// <param name="input">The tensor to split.</param>
+    /// <param name="sizes">The size of a single chunk or list of sizes for each chunk.</param>
+    /// <param name="dim">The dimension along which to split the tensor.</param>
+    static member split(input:Tensor, sizes:seq<int>, ?dim:int) = input.split(sizes, ?dim=dim)
+
+    /// <summary>Return the element-wise addition of the two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member add(a:Tensor, b:Tensor) = a.add(b)
+
+    /// <summary>Return the element-wise subtraction of the two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member sub(a:Tensor, b:Tensor) = a.sub(b)
+
+    /// <summary>Return the element-wise multiplication of the two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member mul(a:Tensor, b:Tensor) = a.mul(b)
+
+    /// <summary>Return the element-wise division of the two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member div(a:Tensor, b:Tensor) = a.div(b)
+
+    /// <summary>Return the element-wise exponentiation of the two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member pow(a:Tensor, b:Tensor) = a.pow(b)
+
+    /// <summary>Matrix product of two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member matmul(a:Tensor, b:Tensor) = a.matmul(b)
+
+    /// <summary>Computes the dot product (inner product) of two tensors.</summary>
+    /// <param name="a">The first tensor.</param>
+    /// <param name="b">The second tensor.</param>
+    static member dot(a:Tensor, b:Tensor) = a.dot(b)
+
+    /// <summary>Return the element-wise negation of the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member neg(input:Tensor) = input.neg()
+
+    /// <summary>Returns the sum of all elements in the input tensor</summary>
+    /// <param name="input">The input tensor.</param>
+    static member sum(input:Tensor) = input.sum()
+
+    /// <summary>Returns the sum of each row of the input tensor in the given dimension dim. If dim is a list of dimensions, reduce over all of them.</summary>
+    /// <remarks>
+    ///  If keepdim is true, the output tensor is of the same size as input except in the dimension(s) dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 (or len(dim)) fewer dimension(s).
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member sum(input:Tensor, dim:int, ?keepDim:bool) = input.sum(dim, ?keepDim=keepDim)
+
+    /// <summary>Returns the mean value of all elements in the input tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member mean(input:Tensor) = input.mean()
+
+    /// <summary>Returns the mean value of each row of the input tensor in the given dimension dim. If dim is a list of dimensions, reduce over all of them.</summary>
+    /// <remarks>
+    ///  If keepdim is true, the output tensor is of the same size as input except in the dimension(s) dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 (or len(dim)) fewer dimension(s).
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member mean(input:Tensor, dim:int, ?keepDim:bool) = input.mean(dim, ?keepDim=keepDim)
+
+    /// <summary>Returns the variance of all elements in the input tensor.</summary>
+    /// <remarks>
+    ///  If unbiased is False, then the variance will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    static member var(input:Tensor, ?unbiased:bool) = input.var(?unbiased=unbiased)
+
+    /// <summary>Returns the variance of each row of the input tensor in the given dimension dim. If dim is a list of dimensions, reduce over all of them.</summary>
+    /// <remarks>
+    ///  If keepdim is true, the output tensor is of the same size as input except in the dimension(s) dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 (or len(dim)) fewer dimension(s).
+    ///  If unbiased is False, then the variance will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    static member var(input:Tensor, dim:int, ?keepDim:bool, ?unbiased:bool) = input.var(dim, ?keepDim=keepDim, ?unbiased=unbiased)
+
+    /// <summary>Returns the standard deviation of all elements in the input tensor.</summary>
+    /// <remarks>
+    ///  If unbiased is False, then the standard deviation will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    static member std(input:Tensor, ?unbiased:bool) = input.std(?unbiased=unbiased)
+
+    /// <summary>Returns the standard deviation of each row of the input tensor in the given dimension dim. If dim is a list of dimensions, reduce over all of them.</summary>
+    /// <remarks>
+    ///  If keepdim is true, the output tensor is of the same size as input except in the dimension(s) dim where it is of size 1. Otherwise, dim is squeezed, resulting in the output tensor having 1 (or len(dim)) fewer dimension(s).
+    ///  If unbiased is False, then the standard deviation will be calculated via the biased estimator. Otherwise, Bessel’s correction will be used.
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    /// <param name="unbiased">Whether to use the unbiased estimation or not.</param>
+    static member std(input:Tensor, dim:int, ?keepDim:bool, ?unbiased:bool) = input.std(dim, ?keepDim=keepDim, ?unbiased=unbiased)
+
+    /// <summary>
+    /// Estimates the covariance matrix of the given tensor. The tensor's first
+    /// dimension should index variables and the second dimension should
+    /// index observations for each variable.
+    /// </summary>
+    /// <remarks>
+    /// If no weights are given, the covariance between variables \(x\) and \(y\) is
+    ///  \[cov(x,y)= \frac{\sum^{N}_{i = 1}(x_{i} - \mu_x)(y_{i} - \mu_y)}{N~-~\text{correction}}\]
+    /// where \(\mu_x\) and \(\mu_y\) are the sample means.
+    /// 
+    /// If there are fweights or aweights then the covariance is
+    /// \[cov(x,y)=\frac{\sum^{N}_{i = 1}w_i(x_{i} - \mu_x^*)(y_{i} - \mu_y^*)}{\text{normalization factor}}\]
+    /// where \(w\) is either fweights or aweights if one weight type is provided.
+    /// If both weight types are provided \(w=\text{fweights}\times\text{aweights}\). 
+    /// \(\mu_x^* = \frac{\sum^{N}_{i = 1}w_ix_{i} }{\sum^{N}_{i = 1}w_i}\)
+    /// is the weighted mean of variables.
+    /// The normalization factor is \(\sum^{N}_{i=1} w_i\) if only fweights are provided or if aweights are provided and <c>correction=0</c>. 
+    /// Otherwise if aweights \(aw\) are provided the normalization factor is
+    ///  \(\sum^N_{i=1} w_i - \text{correction}\times\frac{\sum^N_{i=1} w_i aw_i}{\sum^N_{i=1} w_i}\) 
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="correction">Difference between the sample size and the sample degrees of freedom. Defaults to 1 (Bessel's correction).</param>
+    /// <param name="fweights">Frequency weights represent the number of times each observation was observed. 
+    /// Should be given as a tensor of integers. Defaults to no weights.</param>
+    /// <param name="aweights">Relative importance weights, larger weights for observations that
+    /// should have a larger effect on the estimate. 
+    /// Should be given as a tensor of floating point numbers. Defaults to no weights.</param>
+    /// <returns>Returns a square tensor representing the covariance matrix.
+    ///  Given a tensor with \(N\) variables \(X=[x_1,x_2,\ldots,x_N]\) the
+    /// \(C_{i,j}\) entry on the covariance matrix is the covariance between
+    /// \(x_i\) and \(x_j\).
+    /// </returns>
+    /// <example id="tensor-covariance1">
+    /// <code lang="fsharp">
+    /// let x = dsharp.tensor([0.0;3.4;5.0])
+    /// let y = dsharp.tensor([1.0;2.3;-3.0])
+    /// let xy = dsharp.stack([x;y])
+    /// xy.cov()
+    /// </code>
+    /// Evaluates to
+    /// <code>
+    /// tensor([[ 6.5200, -4.0100],
+    ///         [-4.0100,  7.6300]])
+    /// </code>
+    /// </example>
+    static member cov(input:Tensor, ?correction:int64, ?fweights:Tensor, ?aweights:Tensor) =
+        input.cov(?correction=correction, ?fweights=fweights, ?aweights=aweights)
+    
+    /// <summary>
+    /// Estimates the Pearson correlation coefficient matrix for the given tensor. The tensor's first
+    /// dimension should index variables and the second dimension should
+    /// index observations for each variable.
+    /// </summary>
+    /// <returns>
+    /// The correlation coefficient matrix \(R\) is computed from the covariance
+    /// matrix 
+    /// Returns a square tensor representing the correlation coefficient matrix.
+    ///  Given a tensor with \(N\) variables \(X=[x_1,x_2,\ldots,x_N]\) the
+    /// \(R_{i,j}\) entry on the correlation matrix is the correlation between
+    /// \(x_i\) and \(x_j\).
+    /// </returns>
+    /// <remarks>
+    /// The correlation between variables \(x\) and \(y\) is
+    ///  \[cor(x,y)= \frac{\sum^{N}_{i = 1}(x_{i} - \mu_x)(y_{i} - \mu_y)}{\sigma_x \sigma_y (N ~-~1)}\]
+    /// where \(\mu_x\) and \(\mu_y\) are the sample means and \(\sigma_x\) and \(\sigma_x\) are 
+    /// the sample standard deviations.
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <example id="tensor-correlation1">
+    /// <code lang="fsharp">
+    /// let x = dsharp.tensor([-0.2678; -0.0908; -0.3766;  0.2780])
+    /// let y = dsharp.tensor([-0.5812;  0.1535;  0.2387;  0.2350])
+    /// let xy = dsharp.stack([x;y])
+    /// dsharp.corrcoef(xy)
+    /// </code>
+    /// Evaluates to
+    /// <code>
+    /// tensor([[1.0000, 0.3582],
+    ///         [0.3582, 1.0000]])
+    /// </code>
+    /// </example>
+    static member corrcoef(input: Tensor) = input.corrcoef()
+
+    /// <summary>Gathers values along an axis specified by dim.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The axis along which to index.</param>
+    /// <param name="indices">The the indices of elements to gather.</param>
+    static member gather(input:Tensor, dim:int, indices:Tensor) = input.gather(dim, indices)
+
+    /// <summary>Gathers values along an axis specified by dim.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The axis along which to index.</param>
+    /// <param name="indices">The the indices of elements to gather.</param>
+    /// <param name="destinationShape">The destination shape.</param>
+    static member scatter(input:Tensor, dim:int, indices:Tensor, destinationShape:seq<int>) = input.scatter(dim, indices, destinationShape)
+
+    /// <summary>Returns the original tensor with its dimensions permuted.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="permutation">The desired ordering of dimensions.</param>
+    static member permute(input:Tensor, permutation:seq<int>) = input.permute(permutation)
+
+    /// <summary>Returns a tensor that is a transposed version of input. The given dimensions dim0 and dim1 are swapped.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim0">The first dimension to be transposed.</param>
+    /// <param name="dim1">The second dimension to be transposed.</param>
+    static member transpose(input:Tensor, dim0:int, dim1:int) = input.transpose(dim0, dim1)
+
+    /// <summary>Returns a tensor that is a transposed version of input with dimensions 0 and 1 swapped.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member transpose(input:Tensor) = input.transpose()
+
+    /// <summary>Returns a tensor with all the dimensions of input of size 1 removed.</summary>
+    /// <remarks>If the tensor has a batch dimension of size 1, then squeeze(input) will also remove the batch dimension, which can lead to unexpected errors.</remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">If given, the input will be squeezed only in this dimension.</param>
+    static member squeeze(input:Tensor, ?dim:int) = input.squeeze(?dim=dim)
+
+    /// <summary>Returns a new tensor with a dimension of size one inserted at the specified position</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The index at which to insert the singleton dimension.</param>
+    static member unsqueeze(input:Tensor, dim:int) = input.unsqueeze(dim)
+
+    /// <summary>Returns a new tensor with dimensions of size one appended to the end until the number of dimensions is the same as the other tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="other">The other tensor.</param>
+    static member unsqueezeAs(input:Tensor, other:Tensor) = input.unsqueezeAs(other)
+
+    /// <summary>Reverse the order of a n-D tensor along given axis in dims</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dims">The axis to flip on.</param>
+    static member flip(input:Tensor, dims:seq<int>) = input.flip(dims)
+
+    /// <summary>Dilate the tensor in using the given dilations in each corresponding dimension.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dilations">The dilations to use.</param>
+    static member dilate(input:Tensor, dilations:seq<int>) = input.dilate(dilations)
+
+    /// <summary>Reverse the dilation of the tensor in using the given dilations in each corresponding dimension.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dilations">The dilations to use.</param>
+    static member undilate(input:Tensor, dilations:seq<int>) = input.undilate(dilations)
+
+    /// <summary>Repeat elements of a tensor</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension along which to repeat values.</param>
+    /// <param name="times">The number of repetitions for each element.</param>
+    static member repeat(input:Tensor, dim:int, times:int) = input.repeat(dim, times)
+
+    /// <summary>Get a slice of a tensor</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="index">Index describing the slice.</param>
+    static member slice(input:Tensor, index:seq<int>) = input[index |> Seq.toArray]
+
+    /// <summary>Returns a new tensor with the same data as the self tensor but of a different shape.</summary>
+    /// <remarks>The returned tensor shares the same data and must have the same number of elements, but may have a different size. For a tensor to be viewed, the new view size must be compatible with its original size.
+    ///   The returned tensor shares the same data and must have the same number of elements, but may have a different size. 
+    ///   For a tensor to be viewed, the new view size must be compatible with its original size and stride, i.e., each new view dimension must either be a subspace of an original dimension,
+    ///   or only span across original dimensions \(d, d+1, \dots, d+kd,d+1,…,d+k\) that satisfy the following contiguity-like condition that
+    ///   \(\forall i = d, \dots, d+k-1∀i=d,…,d+k−1 ,\) \[\text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]\]
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    static member view(input:Tensor, shape:seq<int>) = input.view(shape)
+
+    /// <summary>Returns a new tensor with the same data as the self tensor but of a different shape.</summary>
+    /// <remarks>The returned tensor shares the same data and must have the same number of elements, but may have a different size. For a tensor to be viewed, the new view size must be compatible with its original size.
+    ///   The returned tensor shares the same data and must have the same number of elements, but may have a different size. 
+    ///   For a tensor to be viewed, the new view size must be compatible with its original size and stride, i.e., each new view dimension must either be a subspace of an original dimension,
+    ///   or only span across original dimensions \(d, d+1, \dots, d+kd,d+1,…,d+k\) that satisfy the following contiguity-like condition that
+    ///   \(\forall i = d, \dots, d+k-1∀i=d,…,d+k−1 ,\) \[\text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]\]
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="shape">The desired shape of returned tensor.</param>
+    static member view(input:Tensor, shape:int) = input.view(shape)
+
+    /// <summary>View this tensor as the same size as other.</summary>
+    /// <remarks>The returned tensor shares the same data and must have the same number of elements, but may have a different size. For a tensor to be viewed, the new view size must be compatible with its original size.
+    ///   The returned tensor shares the same data and must have the same number of elements, but may have a different size. 
+    ///   For a tensor to be viewed, the new view size must be compatible with its original size and stride, i.e., each new view dimension must either be a subspace of an original dimension,
+    ///   or only span across original dimensions \(d, d+1, \dots, d+kd,d+1,…,d+k\) that satisfy the following contiguity-like condition that
+    ///   \(\forall i = d, \dots, d+k-1∀i=d,…,d+k−1 ,\) \[\text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]\]
+    /// </remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="other">The result tensor has the same size as other.</param>
+    static member viewAs(input:Tensor, other:Tensor) = input.viewAs(other)
+
+    /// <summary>Flattens a contiguous range of dims in a tensor.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="startDim">The first dim to flatten.</param>
+    /// <param name="endDim">The last dim to flatten.</param>
+    static member flatten(input:Tensor, ?startDim:int, ?endDim:int) = input.flatten(?startDim=startDim, ?endDim=endDim)
+
+    /// <summary>Unflattens a tensor dimension by expanding it to the given shape.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to unflatten.</param>
+    /// <param name="unflattenedShape">New shape of the unflattened dimenension.</param>
+    static member unflatten(input:Tensor, dim:int, unflattenedShape:seq<int>) = input.unflatten(dim, unflattenedShape)
+
+    /// <summary>Returns a new tensor with the signs of the elements of input.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member sign(input:Tensor) = input.sign()
+
+    /// <summary>Returns a new tensor with the floor of the elements of input, the largest integer less than or equal to each element.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member floor(input:Tensor) = input.floor()
+
+    /// <summary>Returns a new tensor with the ceil of the elements of input, the smallest integer greater than or equal to each element.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member ceil(input:Tensor) = input.ceil()
+
+    /// <summary>Returns a new tensor with each of the elements of input rounded to the closest integer.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member round(input:Tensor) = input.round()
+
+    /// <summary>Computes the element-wise absolute value of the given input tensor.</summary>
+    /// <remarks>The tensor will have the same element type as the input tensor.</remarks>
+    static member abs(input:Tensor) = input.abs()
+
+    /// <summary>Applies the rectified linear unit function element-wise.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member relu(input:Tensor) = input.relu()
+
+    /// <summary>Applies the leaky rectified linear unit function element-wise</summary>
+    /// <remarks>\[\text{LeakyReLU}(x) = \max(0, x) + \text{negative\_slope} * \min(0, x)\]</remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="negativeSlope">Controls the angle of the negative slope. Default: 0.01.</param>
+    static member leakyRelu(input:Tensor, ?negativeSlope:float) = input.leakyRelu(?negativeSlope=negativeSlope)
+
+    /// <summary>Applies the sigmoid element-wise function</summary>
+    /// <remarks>\[\text{Sigmoid}(x) = \frac{1}{1 + \exp(-x)}\]</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member sigmoid(input:Tensor) = input.sigmoid()
+
+    /// <summary>Applies the softplus function element-wise.</summary>
+    /// <remarks>\[\text{Softplus}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x))\]</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member softplus(input:Tensor) = input.softplus()
+
+    /// <summary>Applies the exp function element-wise.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member exp(input:Tensor) = input.exp()
+
+    /// <summary>Returns a new tensor with the natural logarithm of the elements of input.</summary>
+    /// <remarks> \[y_{i} = \log_{e} (x_{i})\]</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member log(input:Tensor) = input.log()
+
+    /// <summary>Returns the logarithm of the tensor after clamping the tensor so that all its elements are greater than epsilon. This is to avoid a -inf result for elements equal to zero.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="epsilon">The smallest value a tensor element can take before the logarithm is applied. Default: 1e-12</param>
+    static member safelog(input:Tensor, ?epsilon:float) = input.safelog(?epsilon=epsilon)
+
+    /// <summary>Returns a new tensor with the logarithm to the base 10 of the elements of input.</summary>
+    /// <remarks>\[y_{i} = \log_{10} (x_{i})\]</remarks>
+    /// <param name="input">The input tensor.</param>
+    static member log10(input:Tensor) = input.log10()
+
+    /// <summary>Returns a new tensor with the square-root of the elements of input.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member sqrt(input:Tensor) = input.sqrt()
+
+    /// <summary>Returns a new tensor with the sine of the elements of input</summary>
+    /// <param name="input">The input tensor.</param>
+    static member sin(input:Tensor) = input.sin()
+
+    /// <summary>Returns a new tensor with the cosine of the elements of input</summary>
+    /// <param name="input">The input tensor.</param>
+    static member cos(input:Tensor) = input.cos()
+
+    /// <summary>Returns a new tensor with the tangent of the elements of input</summary>
+    /// <param name="input">The input tensor.</param>
+    static member tan(input:Tensor) = input.tan()
+
+    /// <summary>Returns a new tensor with the hyperbolic sine of the elements of input.</summary>
+    static member sinh(input:Tensor) = input.sinh()
+
+    /// <summary>Returns a new tensor with the hyperbolic cosine of the elements of input.</summary>
+    static member cosh(input:Tensor) = input.cosh()
+
+    /// <summary>Returns a new tensor with the hyperbolic tangent of the elements of input.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member tanh(input:Tensor) = input.tanh()
+
+    /// <summary>Returns a new tensor with the arcsine of the elements of input.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member asin(input:Tensor) = input.asin()
+
+    /// <summary>Returns a new tensor with the arccosine of the elements of input.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member acos(input:Tensor) = input.acos()
+    
+    /// <summary>Returns a new tensor with the arctangent of the elements of input.</summary>
+    /// <param name="input">The input tensor.</param>
+    static member atan(input:Tensor) = input.atan()
+
+    /// <summary>Applies a softmax function.</summary>
+    /// <remarks>Softmax is defined as: \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}.</remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">A dimension along which softmax will be computed.</param>
+    static member softmax(input:Tensor, dim:int) = input.softmax(dim)
+
+    /// <summary>Applies a softmax followed by a logarithm.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">A dimension along which softmax will be computed.</param>
+    static member logsoftmax(input:Tensor, dim:int) = input.logsoftmax(dim)
+
+    /// <summary>Applies a logsumexp followed by a logarithm.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dim">The dimension to reduce.</param>
+    /// <param name="keepDim">Whether the output tensor has dim retained or not.</param>
+    static member logsumexp(input:Tensor, dim:int, ?keepDim:bool) = input.logsumexp(dim, ?keepDim=keepDim)
+
+    /// <summary>Creates a criterion that measures the mean squared error (squared L2 norm) between each element in the input and the target.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    static member mseLoss(input:Tensor, target:Tensor, ?reduction:string) =
+        input.mseLoss(target, ?reduction=reduction)
+
+    /// <summary>Creates a criterion that measures the Binary Cross Entropy between the target and the output</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="weight">A manual rescaling weight given to the loss of each batch element.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    static member bceLoss(input:Tensor, target:Tensor, ?weight:Tensor, ?reduction:string) =
+        input.bceLoss(target, ?weight=weight, ?reduction=reduction)
+
+    /// <summary>The negative log likelihood loss.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="weight">A optional manual rescaling weight given to the loss of each batch element.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    static member nllLoss(input:Tensor, target:Tensor, ?weight:Tensor, ?reduction:string) =
+        input.nllLoss(target, ?weight=weight, ?reduction=reduction)
+
+    /// <summary>This criterion combines logsoftmax and nllLoss in a single function</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="target">The target tensor.</param>
+    /// <param name="weight">A optional manual rescaling weight given to the loss of each batch element.</param>
+    /// <param name="reduction">Optionally specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output, 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated, and in the meantime, specifying either of those two args will override reduction. Default: 'mean'.</param>
+    static member crossEntropyLoss(input:Tensor, target:Tensor, ?weight:Tensor, ?reduction:string) =
+        input.crossEntropyLoss(target, ?weight=weight, ?reduction=reduction)
+
+    /// <summary>Applies a 1D max pooling over an input signal composed of several input planes.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    static member maxpool1d(input:Tensor, kernelSize:int, ?stride:int, ?padding:int) =
+        input.maxpool1d(kernelSize, ?stride=stride, ?padding=padding)
+
+    /// <summary>Applies a 1D max pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    static member maxpool1di(input:Tensor, kernelSize:int, ?stride:int, ?padding:int) =
+        input.maxpool1di(kernelSize, ?stride=stride, ?padding=padding)
+
+    /// <summary>Applies a 2D max pooling over an input signal composed of several input planes.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    static member maxpool2d(input:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) =
+        input.maxpool2d(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings)
+
+    /// <summary>Applies a 2D max pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    static member maxpool2di(input:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) =
+        input.maxpool2di(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings)
+
+    /// <summary>Applies a 3D max pooling over an input signal composed of several input planes.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSizes.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    static member maxpool3d(input:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) =
+        input.maxpool3d(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings)
+
+    /// <summary>Applies a 3D max pooling over an input signal composed of several input planes, returning the max indices along with the outputs.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSize.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    static member maxpool3di(input:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>) =
+        input.maxpool3di(?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings)
+
+    /// <summary>Computes a partial inverse of maxpool1di</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="indices">The indices selected by maxpool1di.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="outputSize">The targeted output size.</param>
+    static member maxunpool1d(input:Tensor, indices:Tensor, kernelSize:int, ?stride:int, ?padding:int, ?outputSize:seq<int>) =
+        input.maxunpool1d(indices, kernelSize, ?stride=stride, ?padding=padding, ?outputSize=outputSize)
+
+    /// <summary>Computes a partial inverse of maxpool2di</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="indices">The indices selected by maxpool2di.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSizes.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    /// <param name="outputSize">The targeted output size.</param>
+    static member maxunpool2d(input:Tensor, indices:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?outputSize:seq<int>) =
+        input.maxunpool2d(indices, ?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings, ?outputSize=outputSize)
+
+    /// <summary>Computes a partial inverse of maxpool3di</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="indices">The indices selected by maxpool3di.</param>
+    /// <param name="kernelSize">The size of the window to take a max over.</param>
+    /// <param name="stride">The stride of the window. Default value is kernelSize.</param>
+    /// <param name="padding">The implicit zero padding to be added on both sides.</param>
+    /// <param name="kernelSizes">The sizes of the window to take a max over.</param>
+    /// <param name="strides">The strides of the window. Default value is kernelSizes.</param>
+    /// <param name="paddings">The implicit zero paddings to be added on corresponding sides.</param>
+    /// <param name="outputSize">The targeted output size.</param>
+    static member maxunpool3d(input:Tensor, indices:Tensor, ?kernelSize:int, ?stride:int, ?padding:int, ?kernelSizes:seq<int>, ?strides:seq<int>, ?paddings:seq<int>, ?outputSize:seq<int>) =
+        input.maxunpool3d(indices, ?kernelSize=kernelSize, ?stride=stride, ?padding=padding, ?kernelSizes=kernelSizes, ?strides=strides, ?paddings=paddings, ?outputSize=outputSize)
+
+    /// <summary>Applies a 1D convolution over an input signal composed of several input planes</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit paddings on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    static member conv1d(input:Tensor, filters:Tensor, ?stride:int, ?padding:int, ?dilation:int) =
+        input.conv1d(filters, ?stride=stride, ?padding=padding, ?dilation=dilation)
+
+    /// <summary>Applies a 2D convolution over an input signal composed of several input planes</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on corresponding sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    static member conv2d(input:Tensor, filters:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) =
+        input.conv2d(filters, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+
+    /// <summary>Applies a 3D convolution over an input signal composed of several input planes</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on corresponding sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    static member conv3d(input:Tensor, filters:Tensor, ?stride:int, ?strides:seq<int>, ?padding:int, ?paddings:seq<int>, ?dilation:int, ?dilations:seq<int>) =
+        input.conv3d(filters, ?stride=stride, ?strides=strides, ?padding=padding, ?paddings=paddings, ?dilation=dilation, ?dilations=dilations)
+
+    /// <summary>Applies a 1D transposed convolution operator over an input signal composed of several input planes, sometimes also called 'deconvolution'.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="outputPadding">The additional size added to one side of each dimension in the output shape.</param>
+    static member convTranspose1d(input:Tensor, filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int) =
+        input.convTranspose1d(filters, ?stride=stride, ?padding=padding, ?dilation=dilation, ?outputPadding=outputPadding)
+
+    /// <summary>Applies a 2D transposed convolution operator over an input signal composed of several input planes, sometimes also called 'deconvolution'.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    /// <param name="outputPadding">The additional size added to one side of each dimension in the output shape.</param>
+    /// <param name="outputPaddings">The additional sizes added to one side of each dimension in the output shape.</param>
+    static member convTranspose2d(input:Tensor, filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?outputPaddings:seq<int>) =
+        input.convTranspose2d(filters, ?stride=stride, ?padding=padding, ?dilation=dilation, ?outputPadding=outputPadding, ?strides=strides, ?paddings=paddings, ?dilations=dilations, ?outputPaddings=outputPaddings)
+
+    /// <summary>Applies a 3D transposed convolution operator over an input signal composed of several input planes, sometimes also called 'deconvolution'.</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="filters">The filters.</param>
+    /// <param name="stride">The stride of the convolving kernel.</param>
+    /// <param name="padding">The implicit padding on both sides of the input.</param>
+    /// <param name="dilation">The spacing between kernel elements.</param>
+    /// <param name="strides">The strides of the convolving kernel.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    /// <param name="dilations">The spacings between kernel elements.</param>
+    /// <param name="outputPadding">The additional size added to one side of each dimension in the output shape.</param>
+    /// <param name="outputPaddings">The additional sizes added to one side of each dimension in the output shape.</param>
+    static member convTranspose3d(input:Tensor, filters:Tensor, ?stride:int, ?padding:int, ?dilation:int, ?outputPadding:int, ?strides:seq<int>, ?paddings:seq<int>, ?dilations:seq<int>, ?outputPaddings:seq<int>) =
+        input.convTranspose3d(filters, ?stride=stride, ?padding=padding, ?dilation=dilation, ?outputPadding=outputPadding, ?strides=strides, ?paddings=paddings, ?dilations=dilations, ?outputPaddings=outputPaddings)
+
+    /// <summary>Add zero padding to each side of a tensor</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="paddings">The implicit paddings on corresponding sides of the input.</param>
+    static member pad(input:Tensor, paddings:seq<int>) = input.pad(paddings)
+
+    /// <summary>Convert tensor to an image tensor with shape Channels x Height x Width</summary>
+    /// <remarks>If the input tensor has 4 dimensions, then make a single image grid.</remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="pixelMin">The minimum pixel value.</param>
+    /// <param name="pixelMax">The maximum pixel value.</param>
+    /// <param name="normalize">If True, shift the image to the range (0, 1), by the min and max values specified by range.</param>
+    /// <param name="gridCols">Number of columns of images in the grid.</param>
+    static member toImage(input:Tensor, ?pixelMin:double, ?pixelMax:double, ?normalize:bool, ?gridCols:int) =
+        input.toImage(?pixelMin=pixelMin, ?pixelMax=pixelMax, ?normalize=normalize, ?gridCols=gridCols)
+
+    /// <summary>Convert tensor to a grayscale image tensor and return a string representation approximating grayscale values</summary>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="pixelMin">The minimum pixel value.</param>
+    /// <param name="pixelMax">The maximum pixel value.</param>
+    /// <param name="normalize">If True, shift the image to the range (0, 1), by the min and max values specified by range.</param>
+    /// <param name="gridCols">Number of columns of images in the grid.</param>
+    /// <param name="asciiPalette">The ASCII pallette to use.</param>
+    static member toImageString(input:Tensor, ?pixelMin:double, ?pixelMax:double, ?normalize:bool, ?gridCols:int, ?asciiPalette:string) =
+        input.toImageString(?pixelMin=pixelMin, ?pixelMax=pixelMax, ?normalize=normalize, ?gridCols=gridCols, ?asciiPalette=asciiPalette)
+
+    /// <summary>Convert the tensor to one with the given element type.</summary>
+    /// <remarks>If the element type is unchanged the input tensor will be returned.</remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="dtype">The desired element type of returned tensor.</param>
+    static member cast(input:Tensor, dtype:Dtype) = input.cast(dtype)
+
+    /// <summary>Move the tensor to a difference device, backend and/or change its element type.</summary>
+    /// <remarks>If the characteristics are unchanged the input tensor will be returned.</remarks>
+    /// <param name="input">The input tensor.</param>
+    /// <param name="device">The desired device of returned tensor. Default: if None, the device of the input tensor is used.</param>
+    /// <param name="dtype">The desired element type of returned tensor. Default: if None, the element type of the input tensor is used.</param>
+    /// <param name="backend">The desired backend of returned tensor. Default: if None, the backend of the input tensor is used.</param>
+    static member move(input:Tensor, ?device, ?dtype, ?backend) =
+        input.move(?device=device, ?dtype=dtype, ?backend=backend)
+
+    /// <summary>Configure the default device, dtype, and/or backend.</summary>
+    /// <param name="device">The new default device.</param>
+    /// <param name="dtype">The new default element type. Only floating point dtypes are supported as the default.</param>
+    /// <param name="backend">The new default backend.</param>
+    /// <param name="printer">The new default printer.</param>
+    static member config(?device: Device, ?dtype: Dtype, ?backend: Backend, ?printer: Printer) = 
+        if dtype.IsSome then 
+            if not dtype.Value.IsFloatingPoint then failwithf "Only floating point types are supported as the default type."
+        device |> Option.iter (fun d -> Device.Default <- d)
+        dtype |> Option.iter (fun d -> Dtype.Default <- d)
+        backend |> Option.iter (fun d -> Backend.Default <- d)
+        printer |> Option.iter (fun d -> Printer.Default <- d)
+        dsharp.tensor([0f], Device.Default, Dtype.Default, Backend.Default) |> ignore // We need this to ensure the backend assemblies are loaded and backend is ready to set the random seed immediately after config
+
+    /// <summary>Return the current default device, element type, backend, and printer.</summary>
+    static member config() = Device.Default, Dtype.Default, Backend.Default, Printer.Default
+
+    /// <summary>Configure the default device, element type, backend, printer. Only floating point dtypes are supported as the default.</summary>
+    /// <param name="configuration">A tuple of the new default device, default element type, default backend, and default printer.</param>
+    static member config(configuration: (Device * Dtype * Backend * Printer)) =
+        let (device,dtype,backend,printer) = configuration
+        dsharp.config(device, dtype, backend, printer)
+
+    /// <summary>Returns the list of available backends.</summary>
+    static member backends() =
+        let backends = [|Backend.Reference; Backend.Torch|]
+        let backendsAvailable = Array.zeroCreate<bool> backends.Length
+        for i = 0 to backends.Length-1 do
+            try
+                // Try to create a tensor in the given backend, hence testing the whole underlying process
+                let _ = dsharp.tensor([0f], device=Device.CPU, dtype=Dtype.Float32, backend=backends[i])
+                backendsAvailable[i] <- true
+            with
+            | _ -> ()
+        [for i = 0 to backends.Length-1 do if backendsAvailable[i] then yield backends[i]]
+
+    /// <summary>Returns the list of available devices for a given backend.</summary>
+    /// <param name="backend">Return information for this backend. Defaults to Backend.Default.</param>
+    /// <param name="deviceType">If given, only return devices for this device type.</param>
+    static member devices(?backend, ?deviceType) = BackendTensorStatics.Get(?backend=backend).GetDevices(?deviceType=deviceType)
+
+    /// <summary>Returns the list of available backends and devices available for each backend.</summary>
+    static member backendsAndDevices() = [for b in dsharp.backends() do yield b, dsharp.devices(backend=b)]
+
+    /// <summary>Indicates if a given backend is available.</summary>
+    static member isBackendAvailable(backend) = dsharp.backends() |> List.contains backend
+
+    /// <summary>Indicates if a given device is available for a given backend.</summary>
+    /// <param name="device">The requested device.</param>
+    /// <param name="backend">Return information for this backend. Defaults to Backend.Default.</param>
+    static member isDeviceAvailable(device, ?backend) = dsharp.devices(?backend=backend) |> List.contains device
+
+    /// <summary>Indicates if a given device type is available for a given backend.</summary>
+    /// <param name="deviceType">The requested device type.</param>
+    /// <param name="backend">Return information for this backend. Defaults to Backend.Default.</param>
+    static member isDeviceTypeAvailable(deviceType, ?backend) = BackendTensorStatics.Get(?backend=backend).IsDeviceTypeAvailable(deviceType)
+
+    /// <summary>Indicates if CUDA is available for a given backend.</summary>
+    /// <param name="backend">Return information for this backend. Defaults to Backend.Default.</param>
+    static member isCudaAvailable(?backend) = BackendTensorStatics.Get(?backend=backend).IsDeviceTypeAvailable(DeviceType.CUDA)
+
+
+// Differentiable methods mirroring F# collection modules
+// TODO: implement more differentiable higher-order functions and corresponding unit tests for their derivatives
+type dsharp with
+
+    /// <summary>Create a new 1D tensor using the given initializer for each element.</summary>
+    /// <param name="count">The length of the tensor.</param>
+    /// <param name="initializer">The function used to initialize each element.</param>
+    static member init (count:int) (initializer:int->'a) = Array.init count initializer |> dsharp.tensor
+
+    /// <summary>Create a new 2D tensor using the given initializer for each element.</summary>
+    /// <param name="length1">The length of the tensor in the first dimension.</param>
+    /// <param name="length2">The length of the tensor in the second dimension.</param>
+    /// <param name="initializer">The function used to initialize each element.</param>
+    static member init2d (length1:int) (length2:int) (initializer:int->int->'a) = Array2D.init length1 length2 initializer |> dsharp.tensor
+
+    /// <summary>Create a new 3D tensor using the given initializer for each element.</summary>
+    /// <param name="length1">The length of the tensor in the 1st dimension.</param>
+    /// <param name="length2">The length of the tensor in the 2nd dimension.</param>
+    /// <param name="length3">The length of the tensor in the 3rd dimension.</param>
+    /// <param name="initializer">The function used to initialize each element.</param>
+    static member init3d (length1:int) (length2:int) (length3:int) (initializer:int->int->int->'a) = Array3D.init length1 length2 length3 initializer |> dsharp.tensor
+
+    /// <summary>Create a new 4D tensor using the given initializer for each element.</summary>
+    /// <param name="length1">The length of the tensor in the 1st dimension.</param>
+    /// <param name="length2">The length of the tensor in the 2nd dimension.</param>
+    /// <param name="length3">The length of the tensor in the 3rd dimension.</param>
+    /// <param name="length4">The length of the tensor in the 4th dimension.</param>
+    /// <param name="initializer">The function used to initialize each element.</param>
+    static member init4d (length1:int) (length2:int) (length3:int) (length4:int) (initializer:int->int->int->int->'a) = Array4D.init length1 length2 length3 length4 initializer |> dsharp.tensor
+
+    /// <summary>Create a new 1D tensor using the given value for each element.</summary>
+    /// <param name="count">The number of elements in the tensor.</param>
+    /// <param name="value">The initial value for each element of the tensor.</param>
+    static member create (count:int) (value:'a) = Array.create count value |> dsharp.tensor
+
+    /// <summary>Create a new 1D tensor using '0' as value for each element.</summary>
+    /// <param name="count">The number of elements in the tensor.</param>
+    static member zeroCreate (count:int) = Array.zeroCreate count |> dsharp.tensor
+
+    /// <summary>Produce a new tensor by mapping a function over all elements of the input tensor.</summary>
+    /// <param name="mapping">The function is passed the index of each element. The function to apply to each element of the tensor.</param>
+    /// <param name="tensor">The input tensor.</param>
+    static member mapi (mapping:int[]->Tensor->Tensor) (tensor:Tensor) = // Differentiable map
+        let tflat = tensor.view(-1)
+        let items = Array.init (tflat.nelement) (fun i -> mapping (flatIndexToIndex tensor.shape i) tflat[i])
+        dsharp.stack(items).view(tensor.shape)
+
+    /// <summary>Produce a new tensor by mapping a function over all corresponding elements of two input tensors.</summary>
+    /// <remarks>The function is passed the index of each element. The shapes of the two tensors must be identical.</remarks>
+    /// <param name="mapping">The function to apply to each element of the tensor.</param>
+    /// <param name="tensor1">The first input tensor.</param>
+    /// <param name="tensor2">The second input tensor.</param>
+    static member mapi2 (mapping:int[]->Tensor->Tensor->Tensor) (tensor1:Tensor) (tensor2:Tensor) =  // Differentiable map2
+        if tensor1.shape <> tensor2.shape then failwithf "Expecting tensor1.shape (%A) and tensor2.shape (%A) to be the same" tensor1.shape tensor2.shape
+        let tflat1 = tensor1.view(-1)
+        let tflat2 = tensor2.view(-1)
+        let items = Array.init (tflat1.nelement) (fun i -> mapping (flatIndexToIndex tensor1.shape i) tflat1[i] tflat2[i])
+        dsharp.stack(items).view(tensor1.shape)
+
+    /// <summary>Produce a new tensor by mapping a function over all corresponding elements of three input tensors.</summary>
+    /// <remarks>The function is passed the index of each element. The shapes of the three tensors must be identical.</remarks>
+    /// <param name="mapping">The function to apply to each element of the tensor.</param>
+    /// <param name="tensor1">The first input tensor.</param>
+    /// <param name="tensor2">The second input tensor.</param>
+    /// <param name="tensor3">The third input tensor.</param>
+    static member mapi3 (mapping:int[]->Tensor->Tensor->Tensor->Tensor) (tensor1:Tensor) (tensor2:Tensor) (tensor3:Tensor) =  // Differentiable map3
+        if (tensor1.shape <> tensor2.shape) || (tensor2.shape <> tensor3.shape) then failwithf "Expecting tensor1.shape (%A), tensor2.shape (%A), tensor3.shape (%A) to be the same" tensor1.shape tensor2.shape tensor3.shape
+        let tflat1 = tensor1.view(-1)
+        let tflat2 = tensor2.view(-1)
+        let tflat3 = tensor3.view(-1)
+        let items = Array.init (tflat1.nelement) (fun i -> mapping (flatIndexToIndex tensor1.shape i) tflat1[i] tflat2[i] tflat3[i])
+        dsharp.stack(items).view(tensor1.shape)
+
+    /// <summary>Produce a new tensor by mapping a function over all elements of the input tensor.</summary>
+    /// <param name="mapping">The function to apply to each element of the tensor.</param>
+    /// <param name="tensor">The input tensor.</param>
+    static member map (mapping:Tensor->Tensor) (tensor:Tensor) = tensor |> dsharp.mapi (fun _ v -> mapping v)
+
+    /// <summary>Produce a new tensor by mapping a function over all corresponding elements of two input tensors.</summary>
+    /// <remarks>The shapes of the two tensors must be identical.</remarks>
+    /// <param name="mapping">The function to apply to each element of the tensor.</param>
+    /// <param name="tensor1">The first input tensor.</param>
+    /// <param name="tensor2">The second input tensor.</param>
+    static member map2 (mapping:Tensor->Tensor->Tensor) (tensor1:Tensor) (tensor2:Tensor) = dsharp.mapi2 (fun _ v1 v2 -> mapping v1 v2) tensor1 tensor2
+
+    /// <summary>Produce a new tensor by mapping a function over all corresponding elements of three input tensors.</summary>
+    /// <remarks>The shapes of the three tensors must be identical.</remarks>
+    /// <param name="mapping">The function to apply to each element of the tensor.</param>
+    /// <param name="tensor1">The first input tensor.</param>
+    /// <param name="tensor2">The second input tensor.</param>
+    /// <param name="tensor3">The third input tensor.</param>
+    static member map3 (mapping:Tensor->Tensor->Tensor->Tensor) (tensor1:Tensor) (tensor2:Tensor) (tensor3:Tensor) = dsharp.mapi3 (fun _ v1 v2 v3 -> mapping v1 v2 v3) tensor1 tensor2 tensor3
+
+
diff --git a/src/TensorMath/TensorMath.fsproj b/src/TensorMath/TensorMath.fsproj
index c8d2ac8..c767889 100644
--- a/src/TensorMath/TensorMath.fsproj
+++ b/src/TensorMath/TensorMath.fsproj
@@ -1,12 +1,31 @@
 ﻿<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFramework>netstandard2.0</TargetFramework>
+    <TargetFramework>netstandard2.1</TargetFramework>
     <GenerateDocumentationFile>true</GenerateDocumentationFile>
   </PropertyGroup>
 
   <ItemGroup>
-    <Compile Include="Library.fs" />
+    <Compile Include="Extensions.fs" />
+    <Compile Include="Backend.fs" />
+    <Compile Include="Device.fs" />
+    <Compile Include="Dtype.fs" />
+    <Compile Include="Printer.fs" />
+    <Compile Include="Shape.fs" />
+    <Compile Include="Util.fs" />
+    <Compile Include="Scalar.fs" />
+    <Compile Include="RawTensor.fs" />
+    <Compile Include="Tensor.fs" />
+    <Compile Include="Tensor.Slicing.fs" />
+    <Compile Include="TensorMath.fs" />
+    <Compile Include="TensorMath.Compose.fs" />
+    <Compile Include="Op.AvgPool.fs" />
+    <Compile Include="Op.BMM.fs" />
+    <Compile Include="Op.Inv.fs" />
+    <Compile Include="Op.Det.fs" />
+    <Compile Include="Op.Norm.fs" />
+    <Compile Include="Op.Outer.fs" />
+    <Compile Include="Op.Solve.fs" />
   </ItemGroup>
 
 </Project>
diff --git a/src/TensorMath/Util.fs b/src/TensorMath/Util.fs
new file mode 100644
index 0000000..3234594
--- /dev/null
+++ b/src/TensorMath/Util.fs
@@ -0,0 +1,495 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+/// Contains utilities related to the TensorMath programming model.
+namespace TensorMath.Util
+
+open System
+open System.Collections
+open System.Collections.Generic
+open System.Diagnostics.CodeAnalysis
+open FSharp.Reflection
+open System.IO
+open System.IO.Compression
+open System.Runtime.Serialization
+open System.Runtime.Serialization.Formatters.Binary
+
+
+/// Contains operations relating to pseudo-random number generation.
+type Random() =
+    static let mutable rnd = System.Random()
+
+    /// Sets the random seed.
+    static member Seed(seed) = rnd <- System.Random(seed)
+
+    /// Samples a random value from the standard uniform distribution over the interval [0,1).
+    static member Uniform() = rnd.NextDouble()
+
+    /// Samples a random value from the uniform distribution with the given parameters [low, high).
+    static member Uniform(low, high) = low + (rnd.NextDouble() * (high-low))
+
+    /// Samples a random value from the standard normal distribution with mean 0 and standard deviation 1.
+    static member Normal() =
+        // Marsaglia polar method
+        // TODO: this is discarding one of the two samples that can be generated. For efficiency, we can keep the second sample around to return it in the next call.
+        let rec normal() = 
+            let x, y = (rnd.NextDouble()) * 2.0 - 1.0, (rnd.NextDouble()) * 2.0 - 1.0
+            let s = x * x + y * y
+            if s > 1.0 then normal() else x * sqrt (-2.0 * (log s) / s)
+        normal()
+
+    /// Samples a random value from the normal distribution with the given mean and standard deviation.
+    static member Normal(mean, stddev) = mean + Random.Normal() * stddev
+
+    /// Samples a double value in the range [0, 1)
+    static member Double() = rnd.NextDouble()
+
+    /// Samples a double value in the given range [low, high)
+    static member Double(low, high) = 
+        if high < low then failwithf "Expecting high >= low"
+        low + rnd.NextDouble() * (high-low)
+
+    /// Samples a non-negative random integer
+    static member Integer() = rnd.Next()
+
+    /// Samples a random integer in the given range [low, high).
+    static member Integer(low, high) = rnd.Next(low, high)
+
+    /// Samples an index at random with the given categorical probabilities.
+    static member ChoiceIndex(probs:float[]) =
+        let probsSum = probs |> Array.sum
+        let cumulativeProbs = probs |> Array.map (fun v -> v / probsSum) |> Array.cumulativeSum
+        let p = rnd.NextDouble()
+        cumulativeProbs |> Array.findIndex (fun v -> v >= p)
+
+    /// Samples a value at random from the given array.
+    static member Choice(array:_[]) = array[rnd.Next(array.Length)]
+
+    /// Samples a value at random from the given array using the given categorical probabilities.
+    static member Choice(array:_[], probs:float[]) = 
+        if array.Length <> probs.Length then failwith "Expecting array and probs of same length"
+        array[Random.ChoiceIndex(probs)]
+
+    /// Samples a number of random values  array of random values for the given weighted distribution
+    static member Multinomial(probs:float[], numSamples:int) =
+        Array.init numSamples (fun _ -> Random.ChoiceIndex(probs)) // Samples with replacement
+
+    /// Returns a 2D array where each row contains `numSamples` indices sampled from the multinomial probability distribution defined by the probabilities in the corresponding row of the `probs` array.
+    static member Multinomial(probs:float[,], numSamples:int) =
+        Array2D.init (probs.GetLength(0)) numSamples (fun i _ -> Random.ChoiceIndex(probs[i,*])) // Samples with replacement
+
+    /// Samples a random value from the Bernoulli distribution with the given probability.
+    static member Bernoulli(prob:float) = if rnd.NextDouble() < prob then 1. else 0.
+
+    /// Samples a random value from the Bernoulli distribution.
+    static member Bernoulli() = Random.Bernoulli(0.5)
+
+    /// Returns a universally unique identifier (UUID) string
+    // https://en.wikipedia.org/wiki/Universally_unique_identifier
+    static member UUID() = 
+        // We don't use System.Guid.NewGuid().ToString() because it relies on a separate randomness source whose seed we cannot control through System.Random(seed)
+        let bytes = Array.zeroCreate (sizeof<Guid>)
+        rnd.NextBytes(bytes)
+        let guid = new Guid(bytes)
+        guid.ToString()
+
+    /// Returns an array that is a randomly-shuffled version of the given array, using the Durstenfeld/Knuth shuffle.
+    static member Shuffle(array:_[]) =
+        // Durstenfeld/Knuth shuffle
+        let a = array |> Array.copy
+        let mutable n = array.Length
+        while n > 1 do
+            n <- n - 1
+            let i = rnd.Next(n+1)
+            let temp = a[i]
+            a[i] <- a[n]
+            a[n] <- temp
+        a
+
+/// Contains operations relating to pseudo-random number generation.
+module Random = 
+
+    /// Returns a function that maps a given index to a shuffled version of the indexes up to the given `length`
+    let shuffledIndices (length: int) =
+        let indices = Array.init length id
+        let indicesShuffled = Random.Shuffle(indices)
+        fun (i: int) -> indicesShuffled[i]
+
+/// Contains operations relating to converting .NET data to tensor data.
+module DataConverter =
+
+    /// Gets the elements of an arbitrary IEnumerble.
+    let private seqElements (ie: obj) = 
+        let e = (ie :?> IEnumerable).GetEnumerator()
+        [| while e.MoveNext() do yield e.Current |]
+
+    /// Matches an array type of arbitrary rank.
+    let private (|ArrayTy|_|) (ty: Type) = 
+        if ty.IsArray && ty.GetArrayRank() <= 4 then
+            Some(ty.GetArrayRank(), ty.GetElementType())
+        else 
+           None
+
+    /// Matches a tuple type.
+    let private (|TupleTy|_|) (ty: Type) = 
+        if FSharpType.IsTuple ty then 
+            Some(FSharpType.GetTupleElements ty)
+        else 
+           None
+
+    let rec private  (|ListTy|_|) (ty: Type) = 
+        if ty.IsGenericType && ty.GetGenericTypeDefinition().Equals(typedefof<list<int>>) then
+           Some (ty.GetGenericArguments()[0])
+        else   
+            None
+
+    /// Matches a 1D sequence type (seq<_>) or a subclass.
+    let rec private  (|SeqTy|_|) (ty: Type) = 
+        if ty.IsGenericType && ty.GetGenericTypeDefinition().Equals(typedefof<seq<int>>) then
+           Some (ty.GetGenericArguments()[0])
+        else   
+            match ty.BaseType with 
+            | null -> None 
+            | _ -> 
+                match ty.BaseType with 
+                | SeqTy ety -> Some ety
+                | _ -> 
+                    ty.GetInterfaces() |> Array.tryPick (|SeqTy|_|)
+
+    let rec formatType (ty: Type) = 
+        match ty with 
+        | ListTy ety -> sprintf "list<%s>" (formatType ety)
+        | ArrayTy (_,ety) -> sprintf "%s[]" (formatType ety)
+        | SeqTy ety -> sprintf "seq<%s>" (formatType ety)
+        | TupleTy etys -> String.concat "*" (Array.map formatType etys)
+        | ty when ty = typeof<int64> -> "int64"
+        | ty when ty = typeof<int> -> "int"
+        | ty when ty = typeof<double> -> "double"
+        | ty when ty = typeof<float32> -> "float32"
+        | _ -> ty.ToString()
+
+    let private (|SeqTupleTy|_|) (ty: Type) = 
+        match ty with 
+        | SeqTy (TupleTy etys) -> 
+            match etys |> Array.tryFind (fun ety -> ety <> etys[0]) with
+            | None -> ()
+            | Some ety2 -> failwithf "jagged input: unexpected mixed types in tuple being used as sequence notation, %s and %s" (formatType etys[0]) (formatType ety2)
+            Some (etys[0])
+        | _ -> None
+
+    let private (|TupleLeafTy|_|) (tgt: Type) (ty: Type) = 
+        match ty with 
+        | TupleTy etys when etys |> Array.forall (fun ety -> ety = tgt) -> Some ()
+        | _ -> None
+
+    let private (|SeqTupleLeafTy|_|) (tgt: Type) (ty: Type) = 
+        match ty with 
+        | SeqTy (TupleLeafTy tgt) -> Some ()
+        | _ -> None
+
+    let private flatArrayAndShape1D<'T> (v: 'T[]) =
+        v, [|Array.length v|]
+
+    let private flatArrayAndShape2D<'T> (v: 'T[,]) =
+        let n1 = Array2D.length1 v
+        let n2 = Array2D.length2 v
+        let arr =
+            [|  for i=0 to n1-1 do
+                    for j=0 to n2-1 do
+                       yield v[i, j] |]
+        arr, [| n1;n2|]
+
+    let private flatArrayAndShape3D<'T> (v: 'T[,,]) =
+        let n1 = Array3D.length1 v
+        let n2 = Array3D.length2 v
+        let n3 = Array3D.length3 v
+        let arr =
+            [|  for i=0 to n1-1 do
+                    for j=0 to n2-1 do
+                        for k=0 to n3-1 do
+                            yield v[i, j, k] |]
+        arr, [| n1;n2;n3 |]
+
+    let private flatArrayAndShape4D<'T> (v: 'T[,,,]) =
+        let n1 = Array4D.length1 v
+        let n2 = Array4D.length2 v
+        let n3 = Array4D.length3 v
+        let n4 = Array4D.length4 v
+        let arr =
+            [|  for i=0 to n1-1 do
+                    for j=0 to n2-1 do
+                        for k=0 to n3-1 do
+                            for m=0 to n4-1 do
+                                yield v[i, j, k, m] |]
+        arr, [| n1;n2;n3;n4 |]
+
+    let private flatArrayAndShape5D<'T> (v: Array) =
+        let n1 = Array5D.length1 v
+        let n2 = Array5D.length2 v
+        let n3 = Array5D.length3 v
+        let n4 = Array5D.length4 v
+        let n5 = Array5D.length5 v
+        let arr =
+            [|  for i1=0 to n1-1 do
+                    for i2=0 to n2-1 do
+                        for i3=0 to n3-1 do
+                            for i4=0 to n4-1 do
+                                for i5=0 to n5-1 do
+                                    yield Array5D.get v i1 i2 i3 i4 i5 :?> 'T|]
+        arr, [| n1;n2;n3;n4;n5 |]
+
+    let private flatArrayAndShape6D<'T> (v: Array) =
+        let n1 = Array6D.length1 v
+        let n2 = Array6D.length2 v
+        let n3 = Array6D.length3 v
+        let n4 = Array6D.length4 v
+        let n5 = Array6D.length5 v
+        let n6 = Array6D.length6 v
+        let arr =
+            [|  for i1=0 to n1-1 do
+                    for i2=0 to n2-1 do
+                        for i3=0 to n3-1 do
+                            for i4=0 to n4-1 do
+                                for i5=0 to n5-1 do
+                                    for i6=0 to n6-1 do
+                                        yield Array6D.get v i1 i2 i3 i4 i5 i6 :?> 'T|]
+        arr, [| n1;n2;n3;n4;n5;n6 |]
+
+    let private seqTupleElements (els: obj) =
+        match seqElements els with 
+        | [| el |] -> FSharpValue.GetTupleFields(el) 
+        | tup -> failwithf "unexpected multiple values in tuple list input: %A" (Array.toList tup)
+
+    let private arrayCast<'T> (els: obj[]) = els |> Array.map (fun v -> v :?> 'T)
+
+    let private (|SeqOrSeqTupleTy|_|) ty =
+        match ty with 
+        | SeqTupleTy ety -> Some (seqTupleElements, ety)
+        | SeqTy ety -> Some (seqElements, ety)
+        | _ -> None
+
+    let private (|SeqOrSeqTupleLeafTy|_|) tgt ty =
+        match ty with 
+        | SeqTupleLeafTy tgt -> Some (seqTupleElements)
+        | SeqTy ety when ety = tgt -> Some (seqElements)
+        | _ -> None
+
+    // An exact type-match test is needed because of https://github.com/DiffSharp/DiffSharp/issues/203 and https://github.com/dotnet/fsharp/issues/10202
+    // That is in .NET and F#, a boxed "byte[]" can be unboxed to "int8[]" and vice-versa.
+    // This also affects pattern matches of the element types of sequences as well
+    let typesMatch<'T> (array: System.Array) = (array.GetType().GetElementType() = typeof<'T>)
+
+    let rec tryFlatArrayAndShape<'T> (value:obj) : ('T[] * int[]) option =
+        match value with
+        | :? 'T as v -> Some ([|v|], [||])
+        | :? ('T[]) as v when typesMatch<'T> v -> Some (flatArrayAndShape1D v)
+        | :? ('T[,]) as v when typesMatch<'T> v -> Some (flatArrayAndShape2D<'T> v)
+        | :? ('T[,,]) as v when typesMatch<'T> v -> Some (flatArrayAndShape3D<'T> v)
+        | :? ('T[,,,]) as v when typesMatch<'T> v -> Some (flatArrayAndShape4D<'T> v)
+        | :? System.Array as v when v.Rank = 5 && typesMatch<'T> v -> Some (flatArrayAndShape5D<'T> v)
+        | :? System.Array as v when v.Rank = 6 && typesMatch<'T> v -> Some (flatArrayAndShape6D<'T> v)
+        | :? seq<'T> as v when typesMatch<'T> (Seq.toArray v) -> Some (flatArrayAndShape1D (Seq.toArray v))
+        | :? seq<seq<'T>> as v when typesMatch<'T> (array2D v) -> Some (flatArrayAndShape2D (array2D v))
+        | :? seq<seq<seq<'T>>> as v when typesMatch<'T> (array3D v) -> Some (flatArrayAndShape3D (array3D v))
+        | :? seq<seq<seq<seq<'T>>>> as v when typesMatch<'T> (array4D v) -> Some (flatArrayAndShape4D (array4D v))
+        | :? seq<seq<seq<seq<seq<'T>>>>> as v when typesMatch<'T> (array5D v) -> Some (flatArrayAndShape5D (array5D v))
+        | :? seq<seq<seq<seq<seq<seq<'T>>>>>> as v when typesMatch<'T> (array6D v) -> Some (flatArrayAndShape6D (array6D v))
+        | _ -> 
+        let vty = value.GetType()
+        let tgt = (typeof<'T>)
+        match vty with
+        // list<int * int> -> dim 1
+        | SeqTupleLeafTy tgt -> 
+            let arr = value |> seqTupleElements |> arrayCast<'T>
+            Some (arr, [| arr.Length |])
+        // list<list<int * int>> etc. -> dim 2
+        | SeqOrSeqTupleTy (fetcher, (SeqOrSeqTupleLeafTy tgt fetcher2)) -> 
+            let els = value |> fetcher |> Array.map (fetcher2 >> arrayCast<'T>) |> array2D
+            Some (flatArrayAndShape2D<'T> els)
+        // ... -> dim 3
+        | SeqOrSeqTupleTy (fetcher1, SeqOrSeqTupleTy (fetcher2, SeqOrSeqTupleLeafTy tgt fetcher3)) -> 
+            let els = value |> fetcher1 |> Array.map (fetcher2 >> Array.map (fetcher3 >> arrayCast<'T>)) |> array3D
+            Some (flatArrayAndShape3D<'T> els)
+        // ... -> dim 4
+        | SeqOrSeqTupleTy (fetcher1, SeqOrSeqTupleTy (fetcher2, SeqOrSeqTupleTy (fetcher3, SeqOrSeqTupleLeafTy tgt fetcher4))) -> 
+            let els = value |> fetcher1 |> Array.map (fetcher2 >> Array.map (fetcher3 >> Array.map (fetcher4 >> arrayCast<'T>))) |> array4D
+            Some (flatArrayAndShape4D<'T> els)
+        // ... -> dim 5
+        | SeqOrSeqTupleTy (fetcher1, SeqOrSeqTupleTy (fetcher2, SeqOrSeqTupleTy (fetcher3, SeqOrSeqTupleTy (fetcher4, SeqOrSeqTupleLeafTy tgt fetcher5)))) -> 
+            let els = value |> fetcher1 |> Array.map (fetcher2 >> Array.map (fetcher3 >> Array.map (fetcher4 >> Array.map (fetcher5 >> arrayCast<'T>)))) |> array5D
+            Some (flatArrayAndShape5D<'T> els)
+        // ... -> dim 6
+        | SeqOrSeqTupleTy (fetcher1, SeqOrSeqTupleTy (fetcher2, SeqOrSeqTupleTy (fetcher3, SeqOrSeqTupleTy (fetcher4, SeqOrSeqTupleTy (fetcher5, SeqOrSeqTupleLeafTy tgt fetcher6))))) -> 
+            let els = value |> fetcher1 |> Array.map (fetcher2 >> Array.map (fetcher3 >> Array.map (fetcher4 >> Array.map (fetcher5 >> Array.map (fetcher6 >> arrayCast<'T>))))) |> array6D
+            Some (flatArrayAndShape6D<'T> els)
+        | _ -> None
+
+
+    [<ExcludeFromCodeCoverage>]
+    let inline dataOfValues ofFloat32 ofFloat64 ofInt8 ofInt16 ofInt32 ofInt64 ofBool ofByte (value:obj) : (^T[] * int[]) = 
+        match value |> tryFlatArrayAndShape<float32> with
+        | Some (values, shape) -> (values |> Array.map ofFloat32, shape)
+        | None -> 
+        match value |> tryFlatArrayAndShape<double> with
+        | Some (values, shape) -> (values |> Array.map ofFloat64, shape) 
+        | None -> 
+        match value |> tryFlatArrayAndShape<int64> with
+        | Some (values, shape) -> (values |> Array.map ofInt64, shape)
+        | None -> 
+        match value |> tryFlatArrayAndShape<int32> with
+        | Some (values, shape) -> (values |> Array.map ofInt32, shape) 
+        | None -> 
+        match value |> tryFlatArrayAndShape<int16>  with
+        | Some (values, shape) -> (values |> Array.map ofInt16, shape)
+        | None -> 
+        match value |> tryFlatArrayAndShape<bool> with
+        | Some (values, shape) -> (values |> Array.map ofBool, shape) 
+        | None -> 
+        match value |> tryFlatArrayAndShape<byte>  with
+        | Some (values, shape) -> (values |> Array.map ofByte, shape)
+        | None -> 
+        match value |> tryFlatArrayAndShape<int8>  with
+        | Some (values, shape) -> (values |> Array.map ofInt8, shape)
+        | None -> 
+        // Empty tensor (no data, shape: [0])
+        match value with
+        | :? (seq<obj>) as v when Seq.isEmpty v -> ([||] |> Array.map ofFloat32, [|0|])
+        | _ ->
+        failwithf "Cannot convert from value of type %A" (value.GetType())
+
+    let dataOfValuesForFloat32 (value:obj) =
+        dataOfValues float32 float32 float32 float32 float32 float32 (fun x -> if x then 1.0f else 0.0f) float32 value 
+
+    let dataOfValuesForFloat64 (value:obj) =
+        dataOfValues double double double double double double (fun x -> if x then 1.0 else 0.0) double value 
+
+    let dataOfValuesForByte (value:obj) =
+        dataOfValues byte byte byte byte byte byte (fun x -> if x then 1uy else 0uy) id value 
+
+    let dataOfValuesForInt8 (value:obj) =
+        dataOfValues int8 int8 int8 int8 int8 int8 (fun x -> if x then 1y else 0y) int8 value 
+
+    let dataOfValuesForInt16 (value:obj) =
+        dataOfValues int16 int16 int16 int16 int16 int16 (fun x -> if x then 1s else 0s) int16 value 
+
+    let dataOfValuesForInt32 (value:obj) =
+        dataOfValues int32 int32 int32 int32 int32 int32 (fun x -> if x then 1 else 0) int32 value
+
+    let dataOfValuesForInt64 (value:obj) =
+        dataOfValues int64 int64 int64 int64 int64 int64 (fun x -> if x then 1L else 0L) int64 value
+
+    let dataOfValuesForBool (value:obj) =
+        dataOfValues System.Convert.ToBoolean System.Convert.ToBoolean System.Convert.ToBoolean System.Convert.ToBoolean System.Convert.ToBoolean System.Convert.ToBoolean id System.Convert.ToBoolean value 
+
+
+/// Contains auto-opened utilities related to the TensorMath programming model.
+[<AutoOpen>]
+module UtilAutoOpens =
+
+    /// Returns a function that memoizes the given function using a lookaside table.
+    let memoize fn =
+        let cache = new Dictionary<_,_>()
+        fun x ->
+            match cache.TryGetValue x with
+            | true, v -> v
+            | false, _ ->
+                let v = fn x
+                cache.Add(x,v)
+                v
+
+    /// Saves the given value to the given local file using binary serialization.
+    let saveBinary (object: 'T) (fileName:string) =
+        let formatter = BinaryFormatter()
+        let fs = new FileStream(fileName, FileMode.Create)
+        let cs = new GZipStream(fs, CompressionMode.Compress)
+        try
+            formatter.Serialize(cs, object)
+            cs.Flush()
+            cs.Close()
+            fs.Close()
+        with
+        | :? SerializationException as e -> failwithf "Cannot save to file. %A" e.Message
+
+    /// Loads the given value from the given local file using binary serialization.
+    let loadBinary (fileName:string):'T =
+        let formatter = BinaryFormatter()
+        let fs = new FileStream(fileName, FileMode.Open)
+        let cs = new GZipStream(fs, CompressionMode.Decompress)
+        try
+            let object = formatter.Deserialize(cs) :?> 'T
+            cs.Close()
+            fs.Close()
+            object
+        with
+        | :? SerializationException as e -> failwithf "Cannot load from file. %A" e.Message
+
+    /// Value of log(sqrt(2*Math.PI)).
+    let logSqrt2Pi = log(sqrt(2. * Math.PI))
+
+    /// Value of log(10).
+    let log10Val = log 10.
+
+    /// Indents all lines of the given string by the given number of spaces.
+    let indentNewLines (str:String) numSpaces =
+        let mutable ret = ""
+        let spaces = String.replicate numSpaces " "
+        str |> Seq.toList |> List.iter (fun c -> 
+                            if c = '\n' then 
+                                ret <- ret + "\n" + spaces
+                            else ret <- ret + string c)
+        ret
+
+    /// Left-pads a string up to the given length.
+    let stringPad (s:string) (width:int) =
+        if s.Length > width then s
+        else String.replicate (width - s.Length) " " + s
+
+    /// Left-pads a string to match the length of another string.
+    let stringPadAs (s1:string) (s2:string) = stringPad s1 s2.Length
+
+    /// Formats an integer as a string with comma as thousands separator
+    let thousandsInt(x:int) = String.Format("{0:#,0}", x)
+
+    /// Formats an integer as a string with comma as thousands separator
+    let thousandsFloat(x:float) = String.Format("{0:N}", x)
+
+    /// Returns the file contents as Base64 encoded string
+    let fileToBase64String fileName =
+        let bytes = System.IO.File.ReadAllBytes(fileName)
+        System.Convert.ToBase64String(bytes)
+
+    /// Given a PNG image file name, returns an HTML image element with the image content included as a Base64 encoded string
+    let pngToHtml fileName widthPixels =
+        sprintf """<img src="data:image/png;base64,%s" style="width: %dpx; height: auto"/>""" (fileName |> fileToBase64String) widthPixels
+
+    /// Return a human-readable string representation of the given value in Bytes.
+    let bytesReadable (i:int64) =
+        // Based on https://www.somacon.com/p576.php
+        let absolute_i = abs i
+        let suffix, readable = 
+            // https://en.wikipedia.org/wiki/Binary_prefix
+            if absolute_i >= 0x1000000000000000L then // exbibyte
+                "EiB", (i >>> 50)
+            elif absolute_i >= 0x4000000000000L then // pebibyte
+                "PiB", (i >>> 40)
+            elif absolute_i >= 0x10000000000L then // tebibyte
+                "TiB", (i >>> 30)
+            elif absolute_i >= 0x40000000L then // gibibyte
+                "GiB", (i >>> 20)
+            elif absolute_i >= 0x100000L then // mebibyte
+                "MiB", (i >>> 10)
+            elif absolute_i >= 0x400L then // kibibyte
+                "KiB", i
+            else
+                "B", i // Byte
+        if suffix = "B" then i.ToString("0 B") else
+        let readable = (double readable / 1024.)
+        readable.ToString("0.### ") + suffix
+
+    // Avoids warning FS3370 in F# 6
+    let (!) (r: 'T ref)  = r.Value
+
+    // Avoids warning FS3370 in F# 6
+    let (:=) (r: 'T ref) (v: 'T)  = r.Value <- v
diff --git a/tests/TensorMath.Backends.TestDuplicate/Reference.RawTensor.fs b/tests/TensorMath.Backends.TestDuplicate/Reference.RawTensor.fs
new file mode 100644
index 0000000..6d112f2
--- /dev/null
+++ b/tests/TensorMath.Backends.TestDuplicate/Reference.RawTensor.fs
@@ -0,0 +1,2367 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+#if TEST_DUPLICATE_BACKEND
+namespace rec TensorMath.Backends.TestDuplicate
+#else
+namespace rec TensorMath.Backends.Reference
+#endif
+
+open System
+open TensorMath
+open TensorMath.Backends
+open TensorMath.Util
+
+#nowarn "77" // use of op_Explicit
+
+[<AutoOpen>]
+module internal Utils = 
+    type RawTensor with
+        member x.GetTypedValues() : 'T[] = (x :?> RawTensorCPU<'T>).Values
+
+/// This is the base class for all RawTensorXyz types.
+/// All type-independent operations are implemented directly on this class. 
+[<AbstractClass>]
+type RawTensorCPU<'T when 'T : equality and 'T :> scalar>(values: 'T[], shape: Shape, dtype: Dtype, device: Device) =
+    inherit RawTensor()
+    do if device.DeviceType = DeviceType.CUDA then failwithf "CUDA is not supported by the reference backend."
+
+    let mutable values = values
+    let mutable isMutable = false
+    let checkMutable() = if not isMutable then failwith "The tensor cannot be mutated." 
+    override _.Shape = shape
+    override _.Dim = shape.Length
+    override _.Nelement = shapeLength shape
+    override _.Dtype = dtype
+    override _.Device = device
+    override _.DeviceType = device.DeviceType
+    override _.Handle = box values
+    override _.Backend =
+#if TEST_DUPLICATE_BACKEND
+        Backend.Register "TestDuplicate"
+#else
+        Backend.Reference
+#endif
+
+    member _.Values : 'T[] = values
+
+    member internal t.IndexToFlatIndex(index:int[]) =
+        indexToFlatIndex t.Shape index
+    
+    member internal t.FlatIndexToIndex(flatIndex:int) =
+        flatIndexToIndex t.Shape flatIndex
+
+    member t.Item
+        with get ([<System.ParamArray>] index:int[]) =
+            // printfn "rawtensor shape %A item index %A" t.Shape index
+            if index.Length <> t.Dim then failwithf "Expecting a %id index" t.Dim
+            let vvv = t.Values[t.IndexToFlatIndex(index)]
+            vvv
+
+        and set ([<System.ParamArray>] index:int[]) v =
+            if index.Length <> t.Dim then failwithf "Expecting a %id index" t.Dim
+            t.Values[t.IndexToFlatIndex(index)] <- v
+
+    override t.GetItem(indexes:int[]) =
+        t[indexes] :> scalar
+
+    override t.GetSlice(fullBounds:int[,]) =
+        let fullBounds = Shape.completeSliceBounds t.Shape fullBounds
+        let shape = Shape.checkCanGetSlice t.Shape fullBounds
+        let array = Array.zeroCreate (shapeLength shape)
+        let mutable arrayi = 0
+        let rec slice (fullBounds:int[,]) externalCoords =
+            if fullBounds.GetLength(0) = 1 then
+                for i=fullBounds[0,0] to fullBounds[0,1] do
+                    // printfn "inner %A" i
+                    let globalCoords = Array.append externalCoords [|i|]
+                    array[arrayi] <- t[globalCoords]
+                    arrayi <- arrayi + 1
+            else
+                for i=fullBounds[0,0] to fullBounds[0,1] do
+                    // printfn "outer %A" i
+                    slice fullBounds[1..,*] (Array.append externalCoords [|i|])
+        slice fullBounds [||]
+        t.MakeLike(array, shape)
+
+    override t.Clone() = t.MakeLike(Array.copy t.Values, Array.copy t.Shape)
+
+    abstract member MakeLike: values: 'T[] * shape: Shape * ?device: Device -> RawTensor
+
+    override x.ComputeHash() = hash shape + hash values
+    
+    override t.Expand(newShape) =
+        if newShape.Length = 1 && newShape[0] = 0 then t.MakeLike([||], newShape) else  // Return zero-sized tensor if expanding to zero-sized tensor
+        if shape = newShape then t :> _ else
+        Shape.checkCanExpand shape newShape
+        let trim = newShape.Length - shape.Length
+        let exp = shapeLength newShape[0..trim-1]
+        let jshape = newShape[trim..]
+        let n = shapeLength newShape
+        let result = Array.zeroCreate n 
+        if jshape.Length = 0 then 
+            // The expansion is everything
+            for jP = 0 to exp-1 do
+                result[jP] <- values[0]
+        else
+            for jP = 0 to exp-1 do
+                let rec loop ibase jbase d = 
+                    let strideD = if (shape[d] = jshape[d]) then 1 else 0
+                    if d < jshape.Length-1 then
+                        let mutable iD = 0
+                        for jD = 0 to jshape[d]-1 do 
+                            let ibaseD = (ibase+iD)*shape[d+1]
+                            let jbaseD = (jbase+jD)*jshape[d+1]
+                            loop ibaseD jbaseD (d+1)
+                            iD <- iD + strideD
+                    else
+                        let mutable iD = 0
+                        // last loop does the actual copy fragments
+                        for jD = 0 to jshape[d]-1 do 
+                            result[jbase+jD] <- values[ibase+iD]
+                            iD <- iD + strideD
+                loop 0 (jP*jshape[0]) 0
+        t.MakeLike(result, newShape)
+
+    override t.ToValues() =
+        let shape = t.Shape
+        match t.Dim with
+        | 0 -> box values[0]
+        | 1 -> upcast Array.init shape[0] (fun i -> t[i])
+        | 2 -> upcast Array2D.init shape[0] shape[1] (fun i j -> t[i, j])
+        | 3 -> upcast Array3D.init shape[0] shape[1] shape[2] (fun i j k -> t[i, j, k])
+        | 4 -> upcast Array4D.init shape[0] shape[1] shape[2] shape[3] (fun i j k l -> t[i, j, k, l])
+        | 5 -> upcast Array5D.init shape[0] shape[1] shape[2] shape[3] shape[4] (fun i j k l m -> t[i, j, k, l, m])
+        | 6 -> upcast Array6D.init shape[0] shape[1] shape[2] shape[3] shape[4] shape[5] (fun i j k l m n -> t[i, j, k, l, m, n])
+        | _ -> ArrayND.init shape (fun idxs -> t[idxs])
+
+    override _.StackTs(tensors, dim) =
+        let values, shapes = tensors |> Array.map (fun t -> t.GetTypedValues(), t.Shape) |> Array.unzip
+        let n, shape1, shape2, newShape = Shape.checkCanStack shapes dim
+        let m1 = shapeLength shape1
+        let m2 = shapeLength shape2
+        let m = m1 * m2
+        let result = Array.zeroCreate (n * m)
+        for i=0 to (n*m)-1 do
+            let chunk = i/m2
+            let i2 = chunk%n
+            let j2 = (chunk/n)*m2+i%m2
+            result[i] <-values[i2][j2]
+
+        (tensors[0] :?> RawTensorCPU<'T>).MakeLike(result, newShape)
+
+    override t.UnstackT(dim) =
+        let shape = t.Shape
+        let shape1, shape2, unstackedShape = Shape.checkCanUnstack shape dim
+        let n = shape[dim]
+        let m1 = shapeLength shape1
+        let m2 = shapeLength shape2
+        let m = m1 * m2
+        let values = t.Values
+        let results = Array.init n (fun _ -> Array.zeroCreate m)
+        for i=0 to (n*m)-1 do
+            let chunk = i/m2
+            let i2 = chunk%n
+            let j2 = (chunk/n)*m2+i%m2
+            results[i2][j2] <- values[i]
+        results |> Array.map (fun rvalues -> t.MakeLike(rvalues, unstackedShape))
+
+    override t.CatTs(tensors, dim) =
+        let values, shapes = tensors |> Array.map (fun t -> t.GetTypedValues(), t.Shape) |> Array.unzip
+        let n, shape1, m2, shape3, outShape = Shape.checkCanCat shapes dim
+        let m1 = shapeLength shape1
+        let m3 = shapeLength shape3
+        let m = m1 * m2 * m3
+        let result = Array.zeroCreate m
+        let mutable i = 0
+        for j1 = 0 to m1-1 do 
+            for k = 0 to n-1 do
+                let d = shapes[k][dim]
+                let b = j1*m3*d
+                for j2 = 0 to d*m3-1 do
+                    result[i+j2] <-values[k][b+j2]
+                i <- i + d*m3
+
+        t.MakeLike(result, outShape)
+
+    override t.SplitT(sizes, dim) =
+        let shape = t.Shape
+        let outShapes = Shape.checkCanSplit shape sizes dim
+        let n = sizes.Length
+        let shape1 = shape[0..dim-1]
+        let shape2 = shape[dim+1..]
+        let m1 = shapeLength shape1
+        let m3 = shapeLength shape2
+        let values = t.Values
+        let results = Array.init n (fun k -> Array.zeroCreate (m1 * sizes[k] * m3))
+        let mutable i = 0
+        for j1 = 0 to m1-1 do 
+            for k = 0 to n-1 do
+                let d = sizes[k]
+                let b = j1*m3*d
+                for j2 = 0 to d*m3-1 do
+                    results[k][b+j2] <-values[i+j2]
+                i <- i + d*m3
+
+        (results, outShapes) ||> Array.map2 (fun rvalues outShape -> 
+            t.MakeLike(rvalues, outShape))
+
+    override t.PermuteT(permutation) =
+        let inversePermutation, newShape = Shape.checkCanPermute t.Shape permutation
+        let result = t.ZerosLike(newShape) :?> RawTensorCPU<'T>
+        let rec transpose (shape:Shape) externalCoords = 
+            if shape.Length = 1 then
+                for i=0 to shape[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let transposedCoords = Array.permute (fun i -> inversePermutation[i]) globalCoords
+                    result[transposedCoords] <- t[globalCoords]
+            else
+                for i=0 to shape[0]-1 do
+                    transpose shape[1..] (Array.append externalCoords [|i|])
+        transpose t.Shape [||]        
+        upcast result
+
+    override t.TransposeT(dim0, dim1) =
+        let permutation = [| 0 .. t.Shape.Length - 1 |]
+        permutation[dim0] <- dim1
+        permutation[dim1] <- dim0
+        t.PermuteT(permutation)
+
+    override t.TransposeT2() =
+        Shape.checkCanTranspose2d t.Dim
+        let tcols = t.Shape[1]
+        let result = Array2D.init t.Shape[1] t.Shape[0] (fun i j -> t.Values[j*tcols + i])
+        t.CreateLike(result)
+
+    override t.SqueezeT(dim) =
+        let result = Array.copy t.Values
+        t.MakeLike(result, Shape.squeeze dim t.Shape)
+
+    override t.UnsqueezeT(dim) =
+        let outputShape = Shape.checkCanUnsqueeze dim t.Shape
+        let result = Array.copy t.Values
+        t.MakeLike(result, outputShape)
+
+    override t.FlipT(dims:int[]) =
+        Shape.checkCanFlip t.Dim dims
+        match t.Dim with
+        | 0 -> t.Clone()
+        | _ ->
+            let result = t.ZerosLike(t.Shape) :?> RawTensorCPU<'T>
+            let rec flip (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    for i=0 to shape[0]-1 do
+                        let globalCoords = Array.append externalCoords [|i|]
+                        result[mirrorCoordinates globalCoords t.Shape dims] <- t[globalCoords]
+                else
+                    for i=0 to shape[0]-1 do
+                        flip shape[1..] (Array.append externalCoords [|i|])
+            flip t.Shape [||]        
+            upcast result
+
+    override t.DilateT(dilations:int[]) =
+        Shape.checkCanDilate t.Dim dilations
+        match t.Dim with
+        | 0 -> t.Clone()
+        | _ ->
+            let result = t.ZerosLike(Shape.dilated t.Shape dilations) :?> RawTensorCPU<'T>
+            let rec dilate (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    for i=0 to shape[0]-1 do
+                        let globalCoords = Array.append externalCoords [|i|]
+                        result[dilatedCoordinates globalCoords dilations] <- t[globalCoords]
+                else
+                    for i=0 to shape[0]-1 do
+                        dilate shape[1..] (Array.append externalCoords [|i|])
+            dilate t.Shape [||]        
+            upcast result        
+
+    override t.UndilateT(dilations:int[]) =
+        match t.Dim with
+        | 0 -> t.Clone()
+        | _ ->
+            let result = t.ZerosLike(Shape.undilatedShape t.Shape dilations) :?> RawTensorCPU<'T>
+            let rec dilate (shape:Shape) externalCoords = 
+                if shape.Length = 1 then
+                    for i=0 to shape[0]-1 do
+                        let globalCoords = Array.append externalCoords [|i|]
+                        result[globalCoords] <- t[dilatedCoordinates globalCoords dilations]
+                else
+                    for i=0 to shape[0]-1 do
+                        dilate shape[1..] (Array.append externalCoords [|i|])
+            dilate result.Shape [||]
+            upcast result
+
+    override t.GatherT(dim:int, indices) =
+        Shape.checkCanGather t.Shape dim indices.Shape indices.Dtype
+        let indices = indices :?> RawTensorCPU<int>
+        let result = t.ZerosLike(indices.Shape) :?> RawTensorCPU<'T>
+        let rec gather (shape:Shape) externalCoords =
+            if shape.Length = 1 then
+                for i=0 to shape[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let globalCoordsIndices = Array.copy globalCoords
+                    globalCoordsIndices[dim] <- indices[globalCoords]
+                    result[globalCoords] <- t[globalCoordsIndices]
+            else
+                for i=0 to shape[0]-1 do
+                    gather shape[1..] (Array.append externalCoords [|i|])
+        gather result.Shape [||]
+        upcast result
+
+    override t.ScatterT(dim:int, indices, destinationShape:Shape) =
+        Shape.checkCanScatter t.Shape dim indices.Shape indices.Dtype destinationShape
+        let indices = indices :?> RawTensorCPU<int>
+        let result = t.ZerosLike(destinationShape) :?> RawTensorCPU<'T>
+        let rec scatter (shape:Shape) externalCoords =
+            if shape.Length = 1 then
+                for i=0 to shape[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let globalCoordsIndices = Array.copy globalCoords
+                    globalCoordsIndices[dim] <- indices[globalCoords]
+                    result[globalCoordsIndices] <- t[globalCoords]
+            else
+                for i=0 to shape[0]-1 do
+                    scatter shape[1..] (Array.append externalCoords [|i|])
+        scatter t.Shape [||]
+        upcast result
+
+    override t.ViewT(shape:Shape) =
+        Shape.checkCanView t.Shape shape
+        let result = Array.copy t.Values
+        t.MakeLike(result, shape)
+
+    override t.Cast(dtype: Dtype) =
+        if dtype = t.Dtype then 
+            upcast t
+        else
+            let tflat = t.ViewT([|t.Nelement|]) // We flatten, cast, and return with the correct shape because .ToValues() in the next line does not support tensors with dimension > 4.
+            let values = 
+                match t.Dtype with
+                // These special cases for byte and int8 are to ensure that values don't get truncated because RawTensor.Create cannot distinguish between byte and int8
+                | Dtype.Byte -> tflat.ToValues():?>byte[] |> Array.map int |> box
+                | Dtype.Int8 -> tflat.ToValues():?>int8[] |> Array.map int |> box
+                | _ -> tflat.ToValues()
+
+            RawTensor.Create(values, dtype=dtype, backend=t.Backend, device=t.Device).ViewT(t.Shape)
+
+    override t.MoveTo(device: Device) = t.MakeLike(values, shape, device=device)
+
+    override t.SetMutable() = isMutable <- true
+    override t.IsMutable = isMutable
+    member t.SetValues(tmp: RawTensor) = checkMutable(); values <- (tmp :?> RawTensorCPU<'T>).Values
+    override t.ClampInPlace(low, high) = t.SetValues <| t.ClampT(low, high)
+    override t.LtInPlace(t2) = t.SetValues <| t.LtTT(t2)
+    override t.GtInPlace(t2) = t.SetValues <| t.GtTT(t2)
+    override t.LeInPlace(t2) = t.SetValues <| t.LeTT(t2)
+    override t.GeInPlace(t2) = t.SetValues <| t.GeTT(t2)
+    override t.EqInPlace(t2) = t.SetValues <| t.EqTT(t2)
+    override t.NeqInPlace(t2) = t.SetValues <| t.NeqTT(t2)
+    override t.AddInPlace(t2, alpha) = t.SetValues <| t.AddTT(t2, ?alpha=alpha)
+    override t.AddScalarInPlace(t2) = t.SetValues <| t.AddTT0(t2)
+    override t.AddSliceInPlace(location, t2) = t.SetValues <| t.AddTTSlice(location, t2)
+    override t.SubInPlace(t2) = t.SetValues <| t.SubTT(t2)
+    override t.SubScalarInPlace(t2) = t.SetValues <| t.SubTT0(t2)
+    override t.MulInPlace(t2) = t.SetValues <| t.MulTT(t2)
+    override t.MulScalarInPlace(t2) = t.SetValues <| t.MulTT0(t2)
+    override t.DivInPlace(t2) = t.SetValues <| t.DivTT(t2)
+    override t.DivScalarInPlace(t2) = t.SetValues <| t.DivTT0(t2)
+    override t.PowInPlace(t2) = t.SetValues <| t.PowTT(t2)
+    override t.PowScalarInPlace(t2) = t.SetValues <| t.PowTT0(t2)
+    override t.MatMulInPlace(t2) = t.SetValues <| t.MatMulTT(t2)
+    override t.NegInPlace() = t.SetValues <| t.NegT()
+    override t.SignInPlace() = t.SetValues <| t.SignT()
+    override t.FloorInPlace() = t.SetValues <| t.FloorT()
+    override t.CeilInPlace() = t.SetValues <| t.CeilT()
+    override t.RoundInPlace() = t.SetValues <| t.RoundT()
+    override t.AbsInPlace() = t.SetValues <| t.AbsT()
+    override t.ReluInPlace() = t.SetValues <| t.ReluT()
+    override t.SoftplusInPlace() = t.SetValues <| t.SoftplusT()
+    override t.SigmoidInPlace() = t.SetValues <| t.SigmoidT()
+    override t.ExpInPlace() = t.SetValues <| t.ExpT()
+    override t.LogInPlace() = t.SetValues <| t.LogT()
+    override t.Log10InPlace() = t.SetValues <| t.Log10T()
+    override t.SqrtInPlace() = t.SetValues <| t.SqrtT()
+    override t.SinInPlace() = t.SetValues <| t.SinT()
+    override t.CosInPlace() = t.SetValues <| t.CosT()
+    override t.TanInPlace() = t.SetValues <| t.TanT()
+    override t.SinhInPlace() = t.SetValues <| t.SinhT()
+    override t.CoshInPlace() = t.SetValues <| t.CoshT()
+    override t.TanhInPlace() = t.SetValues <| t.TanhT()
+    override t.AsinInPlace() = t.SetValues <| t.AsinT()
+    override t.AcosInPlace() = t.SetValues <| t.AcosT()
+    override t.AtanInPlace() = t.SetValues <| t.AtanT()
+    override t.OnesInPlace() = t.SetValues <| t.OnesLike(t.Shape)
+    override t.RandomInPlace() = t.SetValues <| t.RandomLike(t.Shape) 
+    override t.RandomNormalInPlace() = t.SetValues <| t.RandomNormalLike(t.Shape)
+    override t.RandomIntInPlace(low, high) = t.SetValues <| t.RandomIntLike(t.Shape, low, high)
+    override t.ZerosInPlace() = t.SetValues <| t.ZerosLike(t.Shape)
+
+// Defines the math-dependent operations for `RawTensorCPU<T>` types
+// using generic inline code. Each implementing type (e.g. RawTensorFloat32) instantiates
+// inlines these at concrete types.
+//
+// Most of the functions produce (value, shape) pairs for use in constructing an instance
+// of the final implementing type.
+[<System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage>]
+module internal RawTensorCPU = 
+
+    /// Access the natural "0" value for the element of a CPU tensor type
+    let inline zero< ^T when ^T : (static member Zero : ^T) > = LanguagePrimitives.GenericZero< ^T >
+
+    /// Access the natural "1" value for the element of a CPU tensor type
+    let inline one< ^T when ^T : (static member One : ^T) > = LanguagePrimitives.GenericOne< ^T >
+    
+    /// Get the scalar "0" tensor for a CPU tensor type
+    let inline Zero () : (^T[] * Shape) =
+        let values = [|zero< ^T > |]
+        (values, Shape.scalar)
+
+    /// Get the scalar "1" tensor for a CPU tensor type
+    let inline One() : (^T[] * Shape) =
+        let values = [| one< ^T > |]
+        (values, Shape.scalar)
+    
+    /// Get the "0" tensor for a CPU tensor type of the given shape
+    let inline Zeros(shape:Shape)  : (^T[] * Shape) =
+        let values = Array.zeroCreate (shapeLength shape) 
+        (values, shape)
+
+    /// Get the "0" tensor for a CPU tensor type of the given shape
+    let inline Empty(shape:Shape)  : (^T[] * Shape) = Zeros shape
+
+    let inline Ones(shape:Shape) =
+        let values = Array.create (shapeLength shape) one< ^T >
+        (values, shape)
+
+    let inline CreateFromFlatArray (values: System.Array, shape: Shape) : (^T[] * Shape) = 
+        match values with 
+        | :? ( ^T[]) as arr -> arr, shape
+        | _ -> invalidArg "value" (sprintf "Data unsuitable for RawTensorCPU of type %A" typeof< ^T >)
+
+    let inline Equals(t1: RawTensorCPU< ^T >, t2: RawTensor) = 
+        if t1.Dtype <> t2.Dtype then 
+            opNotSupported2 "Equals" t1.Dtype t2.Dtype
+        match t2 with
+        | :? RawTensorCPU< ^T > as t2 -> t1.Shape = t2.Shape && t1.Values = t2.Values
+        | _ -> invalidOp <| sprintf "Cannot compare RawTensors t1 (Shape=%A, Dtype=%A, Device=%A, Backend=%A) and t2 (Shape=%A, Dtype=%A, Device=%A, Backend=%A)" t1.Shape t1.Dtype t1.Device t1.Backend t2.Shape t2.Dtype t2.Device t2.Backend
+
+    let inline Full(shape:Shape, value: ^T) =
+        let result = Array.create (shapeLength shape) value
+        (result, shape)
+
+    let inline AllClose(t1: RawTensorCPU< ^T >, t2:RawTensor, relativeTolerance: ^T, absoluteTolerance: ^T) =
+        match t2 with
+        | :? RawTensorCPU< ^T > as t2 -> t1.Shape = t2.Shape && Array.allClose relativeTolerance absoluteTolerance t1.Values t2.Values
+        | _ -> invalidOp <| sprintf "Cannot compare RawTensors t1 (Shape=%A, Dtype=%A, Device=%A, Backend=%A) and t2 (Shape=%A, Dtype=%A, Device=%A, Backend=%A)" t1.Shape t1.Dtype t1.Device t1.Backend t2.Shape t2.Dtype t2.Device t2.Backend
+
+    let inline ClampT(t: RawTensorCPU< ^T>, low: RawTensor, high:RawTensor) : (^T[] * Shape) =
+        if low.Dim <> 0 || high.Dim <> 0 then failwithf "Expecting scalar low and high"
+        let tvalue = t.Values
+        let lowvalue = low.GetTypedValues()[0]
+        let highvalue = high.GetTypedValues()[0]
+        let result = Array.map (fun v -> (max (min v highvalue) lowvalue)) tvalue
+        (result, t.Shape)
+
+    let inline LtTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (<) t1value t2value
+        (result, t1.Shape)
+
+    let inline GtTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (>) t1value t2value
+        (result, t1.Shape)
+
+    let inline LeTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (<=) t1value t2value
+        (result, t1.Shape)
+
+    let inline GeTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (>=) t1value t2value
+        (result, t1.Shape)
+
+    let inline EqTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (=) t1value t2value
+        (result, t1.Shape)
+
+    let inline NeqTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (bool[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (<>) t1value t2value
+        (result, t1.Shape)
+
+    let inline MaxIndexT(t: RawTensorCPU< ^T >) =
+        t.FlatIndexToIndex(Seq.maxIndex t.Values)
+
+    let inline MinMaxReduceT op (t: RawTensorCPU< ^T >, dim, keepDim) : RawTensor * RawTensor =
+        let newShape = Shape.checkCanMinMaxReduce dim keepDim t.Shape
+        let shape = t.Shape
+        let shape1 = shape[0..dim-1]
+        let n = shape[dim]
+        let shape2 = shape[dim+1..]
+        let m1 = shapeLength shape1
+        let m3 = shapeLength shape2
+        let values = t.Values
+        let results = Array.zeroCreate (m1 * m3)
+        let indexes = Array.zeroCreate (m1 * m3)
+        for j1 = 0 to m1-1 do 
+            for j2 = 0 to m3-1 do
+                let b = j1*m3 + j2
+                for j3 = 0 to n-1 do
+                    let v = values[j1*n*m3+j3*m3+j2]
+                    if op v results[b] || (j3 = 0) then
+                        results[b] <- v
+                        indexes[b] <- j3
+        let resultsT = t.MakeLike(results, newShape)
+        let indexesT = t.CreateLike(indexes, dtype=Dtype.Int32).ViewT(newShape)
+        resultsT, indexesT
+
+    let inline MinIndexT(t: RawTensorCPU< ^T >) =
+        t.FlatIndexToIndex(Seq.minIndex t.Values)
+
+    let inline AddTT(t1: RawTensorCPU< ^T >, t2: RawTensor, alpha: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (fun a b -> a + alpha * b) t1value t2value
+        (result, t1.Shape)
+
+    let inline AddTT0(t1: RawTensorCPU< ^T >, b: ^T, alpha: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a + alpha * b) t1value
+        (result, t1.Shape)
+
+    let inline internal AddTTSlice(plus, t1: RawTensorCPU< ^T >, location:int[], t2: RawTensor) : (^T[] * Shape) =
+        Shape.checkCanAddSlice t1.Shape location t2.Shape
+        let t1value = t1.Values
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        let result = Array.copy t1value
+        let shape2 = Shape.unsqueezeAs t2.Shape t1.Shape
+        let rec add (shape2:Shape) externalCoords =
+            if shape2.Length = 1 then
+                for i=0 to shape2[0]-1 do
+                    let globalCoords = Array.append externalCoords [|i|]
+                    let t1Coords = Array.map2 (+) globalCoords location
+                    let t1FlatIndex = t1.IndexToFlatIndex(t1Coords)
+                    result[t1FlatIndex] <- plus result[t1FlatIndex] t2[globalCoords]
+            else
+                for i=0 to shape2[0]-1 do
+                    add (shape2[1..]) (Array.append externalCoords [|i|])
+        add shape2 [||]
+        (result, t1.Shape)
+
+    let inline SubTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (-) t1value t2value
+        (result, t1.Shape)
+
+    let inline SubT0T(a: ^T, t2: RawTensor) : (^T[] * Shape) =
+        let t2value = t2.GetTypedValues()
+        let result = Array.map (fun b -> a - b) t2value
+        (result, t2.Shape)
+
+    let inline SubTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun t -> t - b) t1value
+        (result, t1.Shape)
+
+    let inline MulTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (*) t1value t2value
+        (result, t1.Shape)
+
+    let inline MulTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a * b) t1value
+        (result, t1.Shape)
+
+    let inline DivTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 (/) t1value t2value
+        (result, t1.Shape)
+
+    let inline DivT0T(a: ^T, t2: RawTensor) : (^T[] * Shape) =
+        let t2value = t2.GetTypedValues()
+        let result = Array.map (fun b -> a / b) t2value
+        (result, t2.Shape)
+
+    let inline DivTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a / b) t1value
+        (result, t1.Shape)
+
+    let inline PowTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let t2value = t2.GetTypedValues()
+        let result = Array.map2 ( ** ) t1value t2value
+        (result, t1.Shape)
+
+    let inline PowT0T(a: ^T , t2: RawTensor) : (^T[] * Shape) =
+        let t2value = t2.GetTypedValues()
+        let result = Array.map (fun b -> a ** b) t2value
+        (result, t2.Shape)
+
+    let inline PowTT0(t1: RawTensorCPU< ^T >, b: ^T) : (^T[] * Shape) =
+        let t1value = t1.Values
+        let result = Array.map (fun a -> a ** b) t1value
+        (result, t1.Shape)
+
+    let inline MatMulTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        let (t1BatchPart, t1MatrixPart), (t2BatchPart, t2MatrixPart) = Shape.checkCanMatmul t1.Shape t2.Shape
+        if t1BatchPart <> t2BatchPart then failwithf "Cannot matrix multiply raw tensors with shapes %A, %A - mismatch batching" t1.Shape t2.Shape
+        let t1rows, t1cols = t1MatrixPart[0], t1MatrixPart[1]
+        let t2rows, t2cols = t2MatrixPart[0], t2MatrixPart[1]
+        let t1value = t1.Values
+        let t2value = (t2 :?> RawTensorCPU< ^T >).Values        
+        let newShape = Array.append t1BatchPart [| t1rows; t2cols |]
+        let nb = shapeLength t1BatchPart
+        let values = Array.initFlat3D nb t1rows t2cols (fun b i j -> Array.sumBy (fun k -> t1value[b*t1cols*t1rows + i*t1cols + k] * t2value[b*t2cols*t2rows + k*t2cols + j]) [|0..(t2rows-1)|] )
+        (values, newShape)
+    
+    let inline BMMTT(t1: RawTensorCPU< ^T >, t2: RawTensor) : (^T[] * Shape) =
+        Shape.checkCanBMM t1.Shape t2.Shape |> ignore
+        MatMulTT(t1, t2)
+
+    // Returns the LU decomposition of this matrix. The return values are the LU matrix, pivot indices, and a toggle value indicating the number of row exchanges during the decomposition, which is +1 if the number of exchanges were even, -1 if odd. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline LUDecomposition (m: ^T[,]) =
+        let rows = m.GetLength(0)
+        let res = Array2D.copy m
+        let perm = Array.init rows (fun i -> i)
+        let mutable toggle = LanguagePrimitives.GenericOne<'T>
+        for j = 0 to rows - 2 do
+            let mutable colmax:'T = abs res[j, j]
+            let mutable prow = j
+            for i = j + 1 to rows - 1 do
+                let absresij = abs res[i, j]
+                if absresij > colmax then
+                    colmax <- absresij
+                    prow <- i
+            if prow <> j then
+                let tmprow = res[prow, 0..]
+                res[prow, 0..] <- res[j, 0..]
+                res[j, 0..] <- tmprow
+                let tmp = perm[prow]
+                perm[prow] <- perm[j]
+                perm[j] <- tmp
+                toggle <- -toggle
+            for i = j + 1 to rows - 1 do
+                res[i, j] <- res[i, j] / res[j, j]
+                for k = j + 1 to rows - 1 do
+                    res[i, k] <- res[i, k] - res[i, j] * res[j, k]
+        res, perm, toggle
+
+    // Finds an array that, when multiplied by a LU matrix `lu`, gives array `b`. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline matrixSolveHelper (lu:^T[,]) (b:^T[]) =
+        let n = lu.GetLength 0
+        let x = Array.copy b
+        for i = 1 to n - 1 do
+            let mutable sum = x[i]
+            for j = 0 to i - 1 do
+                sum <- sum - lu[i, j] * x[j]
+            x[i] <- sum
+        x[n - 1] <- x[n - 1] / lu[n - 1, n - 1]
+        for i in (n - 2) .. -1 .. 0 do
+            let mutable sum = x[i]
+            for j = i + 1 to n - 1 do
+                sum <- sum - lu[i, j] * x[j]
+            x[i] <- sum / lu[i, i]
+        x
+
+    // Solves a system of linear equations ax = b, where the coefficients are given in matrix `a` and the result vector is vector `b`. The returned vector will correspond to x. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline solve (a: ^T[,]) (b: ^T[]) =
+        let lu, perm, _ = LUDecomposition a
+        let bp = Array.init (a.GetLength(0)) (fun i -> b[perm[i]])
+        matrixSolveHelper lu bp
+
+    // Inverts matrix. Source: Atilim Gunes Baydin, FsAlg, 2015, https://github.com/gbaydin/FsAlg
+    let inline inverseMatrix (m: ^T[,]) =
+        let rows = m.GetLength(0)
+        let res = Array2D.copy m
+        let lu, perm, _ = LUDecomposition m
+        let b:'T[] = Array.zeroCreate rows
+        for i = 0 to rows - 1 do
+            for j = 0 to rows - 1 do
+                if i = perm[j] then
+                    b[j] <- LanguagePrimitives.GenericOne<'T>
+                else
+                    b[j] <- LanguagePrimitives.GenericZero<'T>
+            let x = matrixSolveHelper lu b
+            res[0.., i] <- x
+        res
+
+    let inline InverseT(t: RawTensorCPU< ^T >) : RawTensorCPU< ^T > =
+        Shape.checkCanInvert t.Shape
+        let dim = t.Shape.Length
+        if dim = 2 then  // One matrix
+            let tinv = inverseMatrix (t.ToArray() :?> ^T[,])
+            let tinvflat = [|  for i=0 to tinv.GetLength(0)-1 do for j=0 to tinv.GetLength(1)-1 do yield tinv[i, j] |]
+            t.MakeLike(tinvflat, t.Shape) :?> RawTensorCPU<'T>
+        else  // Batch of matrices
+            let tinvs = 
+                t.UnstackT(0)
+                |> Array.map (fun v -> inverseMatrix (v.ToArray() :?> ^T[,]))
+                |> Array.map (fun v -> [|  for i=0 to v.GetLength(0)-1 do for j=0 to v.GetLength(1)-1 do yield v[i, j] |])
+                |> Array.map (fun v -> t.MakeLike(v, [|t.Shape[1]; t.Shape[2]|]))
+            t.StackTs(tinvs, 0) :?> RawTensorCPU<'T>
+    
+    let inline diagonal(square: ^T[,]) =
+        let n = square.GetLength(0)
+        if n <> square.GetLength(1) then failwith "Expecting a square array"
+        Array.init n (fun i -> square[i, i])
+
+    let inline prod(t: ^T[]) =
+        Array.fold (fun s x -> s * x) LanguagePrimitives.GenericOne<'T> t
+
+    let inline DetT(t: RawTensorCPU< ^T >) : RawTensorCPU< ^T > =
+        Shape.checkCanDet t.Shape
+        let dim = t.Shape.Length
+        if dim = 2 then
+            let lu, _, toggle = LUDecomposition(t.ToArray() :?> ^T[,])
+            let d:^T = toggle * (prod (diagonal lu))
+            t.MakeLike([|d|], [||]) :?> RawTensorCPU<'T>
+        else
+            let tdets = 
+                t.UnstackT(0)
+                |> Array.map (fun v -> let lu, _, toggle = LUDecomposition(v.ToArray() :?> ^T[,]) in lu, toggle)
+                |> Array.map (fun (lu, toggle) -> toggle * (prod (diagonal lu)))
+                |> Array.map (fun v -> t.MakeLike([|v|], [||]))
+            t.StackTs(tdets, 0) :?> RawTensorCPU<'T>
+
+    let inline SolveTT(a: RawTensorCPU< ^T >, b: RawTensor) : RawTensorCPU< ^T > =
+        let newShape = Shape.checkCanSolve a.Shape b.Shape
+        let dimA = a.Shape.Length
+        let dimB = b.Shape.Length
+        if dimA = 2 then
+            let n = a.Shape[0]
+            let amatrix = (a.ToArray() :?> ^T[,])
+            if dimB = 1 then
+                let bvector = (b.ToArray() :?> ^T[])
+                let s = solve amatrix bvector
+                a.MakeLike(s, newShape) :?> RawTensorCPU<'T>
+            else // dimB = 2
+                let cols =
+                    b.UnstackT(1) 
+                    |> Array.map (fun v -> v.ToArray() :?> ^T[])
+                    |> Array.map (fun v -> solve amatrix v)
+                    |> Array.map (fun v -> a.MakeLike(v, [|n|]))
+                a.StackTs(cols, 1) :?> RawTensorCPU<'T>
+        else // dimA = 3
+            let n = a.Shape[1]
+            if dimB = 2 then
+                let aa = a.UnstackT(0)
+                let bb = b.UnstackT(0)
+                let ss = 
+                    Array.zip aa bb
+                    |> Array.map (fun (aaa, bbb) ->
+                                            let amatrix = (aaa.ToArray() :?> ^T[,])
+                                            let bvector = (bbb.ToArray() :?> ^T[])
+                                            let s = solve amatrix bvector
+                                            a.MakeLike(s, [|n|]))
+                a.StackTs(ss, 0) :?> RawTensorCPU<'T>
+            else // dimB = 3
+                let aa = a.UnstackT(0)
+                let bb = b.UnstackT(0)
+                let ss = 
+                    Array.zip aa bb
+                    |> Array.map (fun (aaa, bbb) ->
+                                            let amatrix = (aaa.ToArray() :?> ^T[,])
+                                            let cols =
+                                                bbb.UnstackT(1)
+                                                |> Array.map (fun v -> v.ToArray() :?> ^T[])
+                                                |> Array.map (fun v -> solve amatrix v)
+                                                |> Array.map (fun v -> a.MakeLike(v, [|n|]))
+                                            a.StackTs(cols, 1))
+                a.StackTs(ss, 0) :?> RawTensorCPU<'T>
+            // failwithf "Unsupported shapes %A %A" a.Shape b.Shape
+
+    let inline MaxPool1D(t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > * RawTensorCPU< int > =
+        let batchSize, channels, inputSize, outputSize, outputShape =
+            Shape.checkCanMaxpool1d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let indices = t1.ZerosLike(outputShape, dtype=Int32) :?> RawTensorCPU<int>
+        let minValue = t1[t1.MinIndexT()] - one
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v=0 to outputSize-1 do
+                    let mutable maxvalue = minValue
+                    let mutable maxindex = -1
+                    for u=0 to kernelSize-1 do
+                        let i = (v*stride) + u - padding
+                        if i >= 0 && i < inputSize then
+                            let value = t1[n, c, i]
+                            if value > maxvalue then
+                                maxvalue <- value
+                                maxindex <- i
+                    result[[|n; c; v|]] <- maxvalue
+                    indices[[|n; c; v|]] <- maxindex
+        result, indices
+
+    let inline MaxPool2D(t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > * RawTensorCPU< int > =
+        let batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), outputShape =
+            Shape.checkCanMaxpool2d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let indices = t1.ZerosLike(outputShape, dtype=Int32) :?> RawTensorCPU<int>
+        let minValue = t1[t1.MinIndexT()] - one
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        let mutable maxvalue = minValue
+                        let mutable maxindexi0 = -1
+                        let mutable maxindexi1 = -1
+                        for u0=0 to kernelHeight-1 do
+                            for u1=0 to kernelWidth-1 do
+                                let i0 = (v0*stride[0]) + u0 - padding[0]
+                                let i1 = (v1*stride[1]) + u1 - padding[1]
+                                if i0 >= 0 && i0 < inputHeight && i1 >= 0 && i1 < inputWidth then
+                                    let value = t1[n, c, i0, i1]
+                                    if value > maxvalue then
+                                        maxvalue <- value
+                                        maxindexi0 <- i0
+                                        maxindexi1 <- i1
+                        result[[|n; c; v0; v1|]] <- maxvalue
+                        indices[[|n; c; v0; v1|]] <- indexToFlatIndex [|inputHeight; inputWidth|] [|maxindexi0; maxindexi1|]
+        result, indices
+
+    let inline MaxPool3D(t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > * RawTensorCPU< int > =
+        let (batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), outputShape) =
+            Shape.checkCanMaxpool3d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let indices = t1.ZerosLike(outputShape, dtype=Int32) :?> RawTensorCPU<int>
+        let minValue = t1[t1.MinIndexT()] - one
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            let mutable maxvalue = minValue
+                            let mutable maxindexi0 = -1
+                            let mutable maxindexi1 = -1
+                            let mutable maxindexi2 = -1
+                            for u0=0 to kernelDepth-1 do
+                                for u1=0 to kernelHeight-1 do
+                                    for u2=0 to kernelWidth-1 do
+                                        let i0 = (v0*stride[0]) + u0 - padding[0]
+                                        let i1 = (v1*stride[1]) + u1 - padding[1]
+                                        let i2 = (v2*stride[2]) + u2 - padding[2]
+                                        if i0 >= 0 && i0 < inputDepth && i1 >= 0 && i1 < inputHeight && i2 >= 0 && i2 < inputWidth then
+                                            let value = t1[n, c, i0, i1, i2]
+                                            if value > maxvalue then
+                                                maxvalue <- value
+                                                maxindexi0 <- i0
+                                                maxindexi1 <- i1
+                                                maxindexi2 <- i2
+                            result[[|n; c; v0; v1; v2|]] <- maxvalue
+                            indices[[|n; c; v0; v1; v2|]] <- indexToFlatIndex [|inputDepth; inputHeight; inputWidth|] [|maxindexi0; maxindexi1; maxindexi2|]
+        result, indices
+
+    let inline MaxUnpool1D(t1: RawTensorCPU< ^T >, indices: RawTensorCPU<int>, outputSize: int[]) : RawTensorCPU< ^T > =
+        let batchSize, channels, inputSize, outputShape =
+            Shape.checkCanMaxunpool1d t1.Dtype t1.Shape indices.Dtype indices.Shape outputSize
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for u=0 to inputSize-1 do
+                    let i = indices[[|n; c; u|]]
+                    result[[|n; c; i|]] <- t1[[|n; c; u|]]
+        result
+
+    let inline MaxUnpool2D(t1: RawTensorCPU< ^T >, indices: RawTensorCPU<int>, outputSize:int[]) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputHeight, inputWidth), outputShape =
+            Shape.checkCanMaxunpool2d t1.Dtype t1.Shape indices.Dtype indices.Shape outputSize
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for u0=0 to inputHeight-1 do
+                    for u1=0 to inputWidth-1 do
+                        let iflat = indices[[|n; c; u0; u1|]]
+                        let i = flatIndexToIndex [|outputSize[2]; outputSize[3]|] iflat
+                        result[[|n; c; i[0]; i[1]|]] <- t1[[|n; c; u0; u1|]]
+        result
+
+    let inline MaxUnpool3D(t1: RawTensorCPU< ^T >, indices: RawTensorCPU<int>, outputSize:int[]) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputDepth, inputHeight, inputWidth), outputShape =
+            Shape.checkCanMaxunpool3d t1.Dtype t1.Shape indices.Dtype indices.Shape outputSize
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for u0=0 to inputDepth-1 do
+                    for u1=0 to inputHeight-1 do
+                        for u2=0 to inputWidth-1 do
+                            let iflat = indices[[|n; c; u0; u1; u2|]]
+                            let i = flatIndexToIndex [|outputSize[2]; outputSize[3]; outputSize[4]|] iflat
+                            result[[|n; c; i[0]; i[1]; i[2]|]] <- t1[[|n; c; u0; u1; u2|]]
+        result
+
+    let inline Conv1D(t1: RawTensorCPU< ^T >, t2: RawTensor, stride, padding) : RawTensorCPU< ^T > =
+        // t1: input, NxCxI (batchSize x inputChannels x inputLength)
+        // t2: filters, KxCxF (outputChannels x inputChannels x kernelLength)
+        let batchSize, inputChannels, kernelSize, outputChannels, outputSize, outputShape =
+            Shape.checkCanConv1d t1.DeviceType t2.DeviceType t1.Dtype t2.Dtype t1.Shape t2.Shape stride padding 1
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let t1 =
+            if padding = 0 then
+                t1
+            else
+                let tshape = Array.copy t1.Shape
+                tshape[2] <- t1.Shape[2] + padding * 2
+                let t = t1.ZerosLike(tshape)
+                t.AddTTSlice([|0; 0; padding|], t1) :?> RawTensorCPU< ^T >
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        for n=0 to batchSize-1 do
+            for k=0 to outputChannels-1 do
+                for v=0 to outputSize-1 do
+                    let mutable value = zero
+                    for c=0 to inputChannels-1 do
+                        for u=0 to kernelSize-1 do
+                            value <- value + t2[k, c, u] * t1[n, c, (v*stride) + u]
+                    result[[|n; k; v|]] <- value
+        result
+
+    let inline Conv2D(t1: RawTensorCPU< ^T >, t2: RawTensor, stride: int[], padding: int[]) : RawTensorCPU< ^T > =
+        // t1: input, NxCxHxW (batchSize x inputChannels x inputHeight x inputWidth)
+        // t2: filters, KxCxFxG (outputChannels x inputChannels x kernelHeight x kernelWidth)
+        let batchSize, inputChannels, (kernelHeight, kernelWidth), (outputChannels, outputHeight, outputWidth), outputShape =
+            Shape.checkCanConv2d t1.DeviceType t2.DeviceType t1.Dtype t2.Dtype t1.Shape t2.Shape stride padding [|1;1|]
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU< ^T>
+        let t1 =
+            if padding[0] = 0 && padding[1] = 0 then
+                t1
+            else
+                let tshape = Array.copy t1.Shape
+                tshape[2] <- t1.Shape[2] + padding[0] * 2
+                tshape[3] <- t1.Shape[3] + padding[1] * 2
+                let t = t1.ZerosLike(tshape)
+                t.AddTTSlice([|0; 0; padding[0]; padding[1]|], t1) :?> RawTensorCPU< ^T >
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        for n=0 to batchSize-1 do
+            for k=0 to outputChannels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        let mutable value = zero
+                        for c=0 to inputChannels-1 do
+                            for u0=0 to kernelHeight-1 do
+                                for u1=0 to kernelWidth-1 do
+                                    value <- value + t2[k, c, u0, u1] * t1[n, c, (v0*stride[0])+u0, (v1*stride[1])+u1]
+                        result[[|n; k; v0; v1|]] <- value
+        result
+
+    let inline Conv3D(t1: RawTensorCPU< ^T >, t2: RawTensor, stride: int[], padding: int[]) : RawTensorCPU< ^T > =
+        // t1: input, NxCxDxHxW (batchSize x inputChannels x inputDepth x inputHeight x inputWidth)
+        // t2: filters, KxCxExFxG (outputChannels x inputChannels x kernelDepth x kernelHeight x kernelWidth)
+        let batchSize, inputChannels, (kernelDepth, kernelHeight, kernelWidth), (outputChannels, outputDepth, outputHeight, outputWidth), outputShape = 
+            Shape.checkCanConv3d t1.DeviceType t2.DeviceType t1.Dtype t2.Dtype t1.Shape t2.Shape stride padding [|1;1;1|]  
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU< ^T>
+        let t1 =
+            if padding[0] = 0 && padding[1] = 0 && padding[2] = 0 then
+                t1
+            else
+                let tshape = Array.copy t1.Shape
+                tshape[2] <- t1.Shape[2] + padding[0] * 2
+                tshape[3] <- t1.Shape[3] + padding[1] * 2
+                tshape[4] <- t1.Shape[4] + padding[2] * 2
+                let t = t1.ZerosLike(tshape)
+                t.AddTTSlice([|0; 0; padding[0]; padding[1]; padding[2]|], t1) :?> RawTensorCPU< ^T >
+        let t2 = t2 :?> RawTensorCPU< ^T >
+        for n=0 to batchSize-1 do
+            for k=0 to outputChannels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            let mutable value = zero
+                            for c=0 to inputChannels-1 do
+                                for u0=0 to kernelDepth-1 do
+                                    for u1=0 to kernelHeight-1 do
+                                        for u2=0 to kernelWidth-1 do
+                                            // printfn "%A %A %A | %A %A %A" v0 v1 v2 u0 u1 u2
+                                            value <- value + t2[k, c, u0, u1, u2] * t1[n, c, (v0*stride[0])+u0, (v1*stride[1])+u1, (v2*stride[2])+u2]
+                            result[[|n; k; v0; v1; v2|]] <- value
+        result
+
+    let inline AvgPool1D ofInt (t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T >=
+        let batchSize, channels, inputSize, outputSize, outputShape =
+            Shape.checkCanAvgpool1d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v=0 to outputSize-1 do
+                    let mutable avg = zero
+                    for u=0 to kernelSize-1 do
+                        let i = (v*stride) + u - padding
+                        if i >= 0 && i < inputSize then
+                            let value = t1[n, c, i]
+                            avg <- avg + value
+                    result[[|n; c; v|]] <- avg / ofInt kernelSize
+        result
+
+    let inline AvgPool2D ofInt (t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), outputShape =
+            Shape.checkCanAvgpool2d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let kernelSize = kernelHeight * kernelWidth
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        let mutable avg = zero
+                        for u0=0 to kernelHeight-1 do
+                            for u1=0 to kernelWidth-1 do
+                                let i0 = (v0*stride[0]) + u0 - padding[0]
+                                let i1 = (v1*stride[1]) + u1 - padding[1]
+                                if i0 >= 0 && i0 < inputHeight && i1 >= 0 && i1 < inputWidth then
+                                    let value = t1[n, c, i0, i1]
+                                    avg <- avg + value
+                        result[[|n; c; v0; v1|]] <- avg / ofInt kernelSize
+        result
+
+    let inline AvgPool3D ofInt (t1: RawTensorCPU< ^T >, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let (batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), outputShape) =
+            Shape.checkCanAvgpool3d t1.Dtype t1.Shape kernelSize stride padding
+        let result = t1.ZerosLike(outputShape) :?> RawTensorCPU<'T>
+        let kernelSize = kernelDepth * kernelHeight * kernelWidth
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            let mutable avg = zero
+                            for u0=0 to kernelDepth-1 do
+                                for u1=0 to kernelHeight-1 do
+                                    for u2=0 to kernelWidth-1 do
+                                        let i0 = (v0*stride[0]) + u0 - padding[0]
+                                        let i1 = (v1*stride[1]) + u1 - padding[1]
+                                        let i2 = (v2*stride[2]) + u2 - padding[2]
+                                        if i0 >= 0 && i0 < inputDepth && i1 >= 0 && i1 < inputHeight && i2 >= 0 && i2 < inputWidth then
+                                            let value = t1[n, c, i0, i1, i2]
+                                            avg <- avg + value
+                            result[[|n; c; v0; v1; v2|]] <- avg / ofInt kernelSize
+        result
+
+    let inline AvgPoolReverse1D ofInt (t1: RawTensorCPU< ^T >, originalInput: RawTensor, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, inputSize, outputSize, _outputShape =
+            Shape.checkCanAvgpool1d t1.Dtype originalInput.Shape kernelSize stride padding
+        let result = t1.ZerosLike(originalInput.Shape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v=0 to outputSize-1 do
+                    for u=0 to kernelSize-1 do
+                        let i = (v*stride) + u - padding
+                        if i >= 0 && i < inputSize then
+                            result[[|n; c; i|]] <- t1[[|n; c; v|]] / ofInt kernelSize
+        result
+
+    let inline AvgPoolReverse2D ofInt (t1: RawTensorCPU< ^T >, originalInput: RawTensor, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputHeight, inputWidth), (kernelHeight, kernelWidth), (outputHeight, outputWidth), _outputShape =
+            Shape.checkCanAvgpool2d t1.Dtype originalInput.Shape kernelSize stride padding
+        let kernelSize = kernelHeight * kernelWidth
+        let result = t1.ZerosLike(originalInput.Shape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputHeight-1 do
+                    for v1=0 to outputWidth-1 do
+                        for u0=0 to kernelHeight-1 do
+                            for u1=0 to kernelWidth-1 do
+                                let i0 = (v0*stride[0]) + u0 - padding[0]
+                                let i1 = (v1*stride[1]) + u1 - padding[1]
+                                if i0 >= 0 && i0 < inputHeight && i1 >= 0 && i1 < inputWidth then
+                                    result[[|n; c; i0; i1|]] <- t1[[|n; c; v0; v1|]] / ofInt kernelSize
+        result
+
+    let inline AvgPoolReverse3D ofInt (t1: RawTensorCPU< ^T >, originalInput: RawTensor, kernelSize, stride, padding) : RawTensorCPU< ^T > =
+        let batchSize, channels, (inputDepth, inputHeight, inputWidth), (kernelDepth, kernelHeight, kernelWidth), (outputDepth, outputHeight, outputWidth), _outputShape =
+            Shape.checkCanAvgpool3d t1.Dtype originalInput.Shape kernelSize stride padding
+        let kernelSize = kernelDepth * kernelHeight * kernelWidth
+        let result = t1.ZerosLike(originalInput.Shape) :?> RawTensorCPU<'T>
+        for n=0 to batchSize-1 do
+            for c=0 to channels-1 do
+                for v0=0 to outputDepth-1 do
+                    for v1=0 to outputHeight-1 do
+                        for v2=0 to outputWidth-1 do
+                            for u0=0 to kernelDepth-1 do
+                                for u1=0 to kernelHeight-1 do
+                                    for u2=0 to kernelWidth-1 do
+                                        let i0 = (v0*stride[0]) + u0 - padding[0]
+                                        let i1 = (v1*stride[1]) + u1 - padding[1]
+                                        let i2 = (v2*stride[2]) + u2 - padding[2]
+                                        if i0 >= 0 && i0 < inputDepth && i1 >= 0 && i1 < inputHeight && i2 >= 0 && i2 < inputWidth then
+                                            result[[|n; c; i0; i1; i2|]] <- t1[[|n; c; v0; v1; v2|]] / ofInt kernelSize
+        result
+
+    let inline NegT op (t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = Array.map op t.Values
+        (result, t.Shape)
+
+    let inline SumT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        if Array.isEmpty t.Values then ([|zero< ^T >|], Shape.scalar) else // Return a zero-valued scalar tensor if summing a zero-sized tensor (not holding any value). This is mirroring the behavior in PyTorch 1.5.1.
+        let result = Array.reduce (+) t.Values
+        ([|result|], [||])
+    
+    let inline SumTDim(t: RawTensorCPU< ^T >, dim: int) : RawTensorCPU< ^T > =
+        let sBounds = Array2D.init t.Dim 3 (fun i j -> if j=0 then 0 elif j=1 then t.Shape[i]-1 else 0)
+        sBounds[dim, 1] <- 0
+        sBounds[dim, 2] <- 1
+        let s = t.ZerosLike(shape=t.Shape, dtype=t.Dtype.SummationType).GetSlice(sBounds) :?> RawTensorCPU<'T>
+        s.SetMutable()
+        for i=0 to t.Shape[dim]-1 do
+            sBounds[dim,0] <- i
+            sBounds[dim,1] <- i
+            sBounds[dim,2] <- 1
+            s.AddInPlace(t.GetSlice(sBounds).Cast(t.Dtype.SummationType))
+        s
+
+    let inline SignT op (t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map op
+        (result, t.Shape)
+
+    let inline FloorT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map floor
+        (result, t.Shape)
+
+    let inline CeilT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map ceil
+        (result, t.Shape)
+
+    let inline RoundT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map round
+        (result, t.Shape)
+
+    let inline AbsT op (t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map op
+        (result, t.Shape)
+
+    let inline ReluT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map (max zero< ^T >) 
+        (result, t.Shape)
+
+    let inline SoftplusT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map (fun x -> (max zero< ^T > x) + log(one< ^T > + exp(-abs(x))))
+        (result, t.Shape)
+
+    let inline SigmoidT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map (fun v -> one / (one + exp -v))
+        (result, t.Shape)
+
+    let inline ExpT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map exp
+        (result, t.Shape)
+
+    let inline LogT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map log
+        (result, t.Shape)
+
+    let inline Log10T(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map log10
+        (result, t.Shape)
+        
+    let inline SqrtT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map sqrt
+        (result, t.Shape)
+        
+    let inline SinT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map sin
+        (result, t.Shape)
+        
+    let inline CosT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map cos
+        (result, t.Shape)                
+        
+    let inline TanT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map tan
+        (result, t.Shape)
+        
+    let inline SinhT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map sinh
+        (result, t.Shape)
+        
+    let inline CoshT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map cosh
+        (result, t.Shape)                
+        
+    let inline TanhT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map tanh
+        (result, t.Shape)
+
+    let inline AsinT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map asin
+        (result, t.Shape)
+        
+    let inline AcosT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map acos
+        (result, t.Shape)                
+        
+    let inline AtanT(t: RawTensorCPU< ^T >) : (^T[] * Shape) =
+        let result = t.Values |> Array.map atan
+        (result, t.Shape)
+
+    let inline Random ofDouble (shape:Shape) : (^T[] * Shape) =
+        let values = Array.init (shapeLength shape) (fun _ -> ofDouble (TensorMath.Util.Random.Uniform()))
+        (values, shape)
+
+    let inline RandomNormal ofDouble (shape:Shape) : (^T[] * Shape) =
+        let values = Array.init (shapeLength shape) (fun _ -> ofDouble (TensorMath.Util.Random.Normal()))
+        (values, shape)
+
+    let inline RandomInt ofInt (shape:Shape) (low:int) (high:int) : (^T[] * Shape) =
+        let values = Array.init (shapeLength shape) (fun _ -> ofInt (TensorMath.Util.Random.Integer(low, high)))
+        (values, shape)
+
+/// The concrete implementation of RawTensor for Float32 data.
+type RawTensorFloat32(values: float32[], shape:Shape, device) =
+    inherit RawTensorCPU<float32>(values, shape, Dtype.Float32, device)
+    let create(values, shape) : RawTensor = upcast RawTensorFloat32(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device) 
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorFloat32(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorFloat32(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, float32 relativeTolerance, float32 absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSingle(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSingle(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSingle()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSingle()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSingle(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSingle()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toSingle(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toSingle()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> float32) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = RawTensorCPU.InverseT(t) :> _
+    override t.DetT() = RawTensorCPU.DetT(t) :> _
+    override a.SolveTT(b) = RawTensorCPU.SolveTT(a, b) :> _
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSingle()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random float32 shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal float32 shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt float32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorFloat64(values: double[], shape:Shape, device) =
+    inherit RawTensorCPU<double>(values, shape, Dtype.Float64, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorFloat64(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorFloat64(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorFloat64(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, relativeTolerance, absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toDouble() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toDouble() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toDouble(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toDouble(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toDouble()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toDouble()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toDouble(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toDouble()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toDouble(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toDouble()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D double (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D double (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D double (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D double (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D double (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D double (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> double) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = RawTensorCPU.InverseT(t) :> _
+    override t.DetT() = RawTensorCPU.DetT(t) :> _
+    override a.SolveTT(b) = RawTensorCPU.SolveTT(a, b) :> _
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toDouble()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random double shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal double shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt double shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt8(values: int8[], shape:Shape, device) =
+    inherit RawTensorCPU<int8>(values, shape, Dtype.Int8, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt8(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt8(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt8(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSByte(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSByte(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSByte()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSByte()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSByte(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSByte()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int8 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int8 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int8 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int8 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int8 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int8 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int8) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSByte()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int8
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int8
+    static member RandomInt(shape, low, high, device) = RawTensorCPU.RandomInt int8 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorByte(values: byte[], shape:Shape, device) =
+    inherit RawTensorCPU<byte>(values, shape, Dtype.Byte, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorByte(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorByte(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorByte(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toByte() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toByte(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toByte(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toByte()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toByte()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toByte(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toByte()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D byte (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D byte (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D byte (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D byte (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D byte (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D byte (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (sbyte >> (~-) >> byte ) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (min 1uy) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT id t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toByte()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Byte
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Byte
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt byte shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt16(values: int16[], shape:Shape, device) =
+    inherit RawTensorCPU<int16>(values, shape, Dtype.Int16, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt16(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt16(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt16(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt16() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt16() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toInt16(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toInt16(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toInt16()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toInt16()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toInt16(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toInt16()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int16 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int16 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int16 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int16 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int16 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int16 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int16) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toInt16()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int16
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int16
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt int16 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt32(values: int32[], shape:Shape, device) =
+    inherit RawTensorCPU<int32>(values, shape, Dtype.Int32, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt32(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt32(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt32(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt32() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt32() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toInt32(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toInt32(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toInt32()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toInt32()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toInt32(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toInt32()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) = t.Cast(Dtype.Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int32) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toInt32()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int32
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int32
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt int32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorInt64(values: int64[], shape:Shape, device) =
+    inherit RawTensorCPU<int64>(values, shape, Dtype.Int64, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorInt64(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorInt64(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorInt64(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt64() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toInt64() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toInt64(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toInt64(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toInt64()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toInt64()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toInt64(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toInt64()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D int64 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D int64 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D int64 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D int64 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D int64 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D int64 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D(t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> int64) t |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toInt64()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Int64
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Int64
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt int64 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+type RawTensorBool(values: bool[], shape:Shape, device) =
+    inherit RawTensorCPU<bool>(values, shape, Dtype.Bool, device)
+
+    let create(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorBool(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorBool(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, _relativeTolerance, _absoluteTolerance) = RawTensorCPU.Equals(t1, t2)
+    override t1.LtTT(t2) = t1.MakeLike(Array.map2 (<) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.GtTT(t2) = t1.MakeLike(Array.map2 (>) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.LeTT(t2) = t1.MakeLike(Array.map2 (<=) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.GeTT(t2) = t1.MakeLike(Array.map2 (>=) t1.Values (t2.GetTypedValues()), t1.Shape) 
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> create
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> create
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) = 
+        let alpha = match alpha with Some v -> v.toBool() | None -> true
+        t1.MakeLike(Array.map2 (||) t1.Values (Array.map (fun x -> alpha && x) (t2.GetTypedValues())), t1.Shape)
+    override t1.AddTT0(t2, alpha) =
+        let t2 = t2.toBool() 
+        let alpha = match alpha with Some v -> v.toBool() | None -> true
+        let values = Array.map (fun a -> a || (alpha && t2)) t1.Values
+        t1.MakeLike(values, t1.Shape)
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((||), t1, location, t2) |> create
+    override t1.MulTT(t2) = t1.MakeLike(Array.map2 (&&) t1.Values (t2.GetTypedValues()), t1.Shape)
+    override t1.MulTT0(t2) = 
+        let t2 = t2.toBool() 
+        t1.MakeLike(Array.map (fun a -> a && t2) t1.Values, t1.Shape)
+    override t.SumT(resultType) = t.Cast(Int64).SumT(?resultType=resultType)
+    override t.SumTDim(dim, resultType) = t.Cast(Dtype.Int64).SumTDim(dim, ?resultType=resultType)
+    override t.SignT() = t :> _
+
+    override t.ClampT(_low, _high) = opNotSupported "Clamp" t.Dtype
+    override t1.SubTT(t2) = opNotSupported2 "SubTT" t1.Dtype t2.Dtype
+    override t2.SubFromT0T(_t1) = opNotSupported "SubT0T" t2.Dtype
+    override t1.SubTT0(_t2) = opNotSupported "SubTT0" t1.Dtype
+    override t1.DivTT(t2) = opNotSupported2 "DivTT" t1.Dtype t2.Dtype
+    override t2.DivFromT0T(_t1) = opNotSupported "DivT0T" t2.Dtype
+    override t1.DivTT0(_t2) = opNotSupported "DivTT0" t1.Dtype
+    override t1.MatMulTT(t2) = opNotSupported2 "MatMulTT" t1.Dtype t2.Dtype
+    override t1.BMMTT(t2) = opNotSupported2 "BMMTT" t1.Dtype t2.Dtype
+    override t1.MaxPool1D(_kernelSize, _stride, _padding) = opNotSupported "MaxPool1D" t1.Dtype
+    override t1.MaxPool2D(_kernelSize, _stride, _padding) = opNotSupported "MaxPool2D" t1.Dtype
+    override t1.MaxPool3D(_kernelSize, _stride, _padding) = opNotSupported "MaxPool3D" t1.Dtype
+    override t1.MaxUnpool1D(_indices, _outputSize) = opNotSupported "MaxUnpool1D" t1.Dtype
+    override t1.MaxUnpool2D(_indices, _outputSize) = opNotSupported "MaxUnpool2D" t1.Dtype
+    override t1.MaxUnpool3D(_indices, _outputSize) = opNotSupported "MaxUnpool3D" t1.Dtype
+    override t1.Conv1D(t2, _stride, _padding) = opNotSupported2 "Conv1D" t1.Dtype t2.Dtype
+    override t1.Conv2D(t2, _stride, _padding) = opNotSupported2 "Conv2D" t1.Dtype t2.Dtype
+    override t1.Conv3D(t2, _stride, _padding) = opNotSupported2 "Conv3D" t1.Dtype t2.Dtype
+    override t1.AvgPool1D(_kernelSize, _stride, _padding) = opNotSupported "AvgPool1D" t1.Dtype
+    override t1.AvgPool2D(_kernelSize, _stride, _padding) = opNotSupported "AvgPool2D" t1.Dtype
+    override t1.AvgPool3D(_kernelSize, _stride, _padding) = opNotSupported "AvgPool3D" t1.Dtype
+    override t1.AvgPoolReverse1D(_originalInput, _kernelSize, _stride, _padding) = opNotSupported "AvgPoolReverse1D" t1.Dtype
+    override t1.AvgPoolReverse2D(_originalInput, _kernelSize, _stride, _padding) = opNotSupported "AvgPoolReverse2D" t1.Dtype
+    override t1.AvgPoolReverse3D(_originalInput, _kernelSize, _stride, _padding) = opNotSupported "AvgPoolReverse3D" t1.Dtype
+    override t.NegT() = opNotSupported "NegT" t.Dtype
+    override t.AbsT() = opNotSupported "AbsT" t.Dtype
+    override t.ReluT() = opNotSupported "ReluT" t.Dtype
+    override t.SoftplusT() = opNotSupported "SoftplusT" t.Dtype
+    override t1.PowTT(t2) = opNotSupported2 "PowTT" t1.Dtype t2.Dtype
+    override t2.PowFromT0T(_t1) = opNotSupported "PowT0T" t2.Dtype
+    override t1.PowTT0(_t2) = opNotSupported "PowTT0" t1.Dtype
+    override t.FloorT() = opNotSupported "FloorT" t.Dtype
+    override t.CeilT() = opNotSupported "CeilT" t.Dtype
+    override t.RoundT() = opNotSupported "RoundT" t.Dtype
+    override t.SigmoidT() = opNotSupported "SigmoidT" t.Dtype
+    override t.ExpT() = opNotSupported "ExpT" t.Dtype
+    override t.LogT() = opNotSupported "LogT" t.Dtype
+    override t.Log10T() = opNotSupported "Log10T" t.Dtype
+    override t.SqrtT() = opNotSupported "SqrtT" t.Dtype
+    override t.SinT() = opNotSupported "SinT" t.Dtype
+    override t.CosT() = opNotSupported "CosT" t.Dtype
+    override t.TanT() = opNotSupported "TanT" t.Dtype
+    override t.SinhT() = opNotSupported "SinhT" t.Dtype
+    override t.CoshT() = opNotSupported "CoshT" t.Dtype
+    override t.TanhT() = opNotSupported "TanhT" t.Dtype
+    override t.AsinT() = opNotSupported "AsinT" t.Dtype
+    override t.AcosT() = opNotSupported "AcosT" t.Dtype
+    override t.AtanT() = opNotSupported "AtanT" t.Dtype
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = ([| false |], Shape.scalar) |> createOn device
+    static member One(device) = ([| true |], Shape.scalar) |> createOn device
+    static member Zeros(shape:Shape, device) = (Array.zeroCreate (shapeLength shape), shape) |> createOn device
+    static member Empty(shape:Shape, device) = (Array.zeroCreate (shapeLength shape), shape) |> createOn device
+    static member Ones(shape:Shape, device) = (Array.create (shapeLength shape) true, shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toBool()) |> createOn device
+    static member Random(_shape:Shape, _device) = opNotSupported "Random" Dtype.Bool
+    static member RandomNormal(_shape:Shape, _device) = opNotSupported "RandomNormal" Dtype.Bool
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt System.Convert.ToBoolean shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+/// The concrete implementation of RawTensor for Float16 data.
+type RawTensorFloat16(values: float32[], shape:Shape, device) =
+    inherit RawTensorCPU<float32>(values, shape, Dtype.Float16, device)
+    let create(values, shape) : RawTensor = upcast RawTensorFloat16(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device) 
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorFloat16(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorFloat16(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, float32 relativeTolerance, float32 absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSingle(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSingle(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSingle()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSingle()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSingle(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSingle()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toSingle(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toSingle()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> float32) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSingle()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random float32 shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal float32 shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt float32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+/// The concrete implementation of RawTensor for Float16 data.
+type RawTensorBFloat16(values: float32[], shape:Shape, device) =
+    inherit RawTensorCPU<float32>(values, shape, Dtype.BFloat16, device)
+    let create(values, shape) : RawTensor = upcast RawTensorBFloat16(values, shape, device)
+    let createBool(values, shape) : RawTensor = upcast RawTensorBool(values, shape, device) 
+    static let createOn device (values, shape) : RawTensor = upcast RawTensorBFloat16(values, shape, device)
+
+    override t.MakeLike(values, shape, newDevice) = upcast RawTensorBFloat16(values, shape, defaultArg newDevice device)
+    override t1.Equals(t2:RawTensor) = RawTensorCPU.Equals(t1, t2)
+    override t1.AllClose(t2:RawTensor, relativeTolerance, absoluteTolerance) = RawTensorCPU.AllClose(t1, t2, float32 relativeTolerance, float32 absoluteTolerance)
+    override t.ClampT(low, high) = RawTensorCPU.ClampT(t, low, high) |> create
+    override t.SoftplusT() = RawTensorCPU.SoftplusT(t) |> create
+    override t1.LtTT(t2) = RawTensorCPU.LtTT(t1, t2) |> createBool
+    override t1.GtTT(t2) = RawTensorCPU.GtTT(t1, t2) |> createBool
+    override t1.LeTT(t2) = RawTensorCPU.LeTT(t1, t2) |> createBool
+    override t1.GeTT(t2) = RawTensorCPU.GeTT(t1, t2) |> createBool
+    override t1.EqTT(t2) = RawTensorCPU.EqTT(t1, t2) |> createBool
+    override t1.NeqTT(t2) = RawTensorCPU.NeqTT(t1, t2) |> createBool
+    override t.MaxReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (>) (t, dim, keepDim)
+    override t.MinReduceT(dim, keepDim) = RawTensorCPU.MinMaxReduceT (<) (t, dim, keepDim)
+    override t.MaxIndexT() = RawTensorCPU.MaxIndexT(t)
+    override t.MinIndexT() = RawTensorCPU.MinIndexT(t)
+    override t1.AddTT(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT(t1, t2, alpha) |> create
+    override t1.AddTT0(t2, alpha) =
+        let alpha = match alpha with Some v -> v.toSingle() | None -> RawTensorCPU.one
+        RawTensorCPU.AddTT0(t1, t2.toSingle(), alpha) |> create
+    override t1.AddTTSlice(location:int[], t2) = RawTensorCPU.AddTTSlice((+), t1, location, t2) |> create
+    override t1.SubTT(t2) = RawTensorCPU.SubTT(t1, t2) |> create
+    override t2.SubFromT0T(t1) = RawTensorCPU.SubT0T(t1.toSingle(), t2) |> create
+    override t1.SubTT0(t2) = RawTensorCPU.SubTT0(t1, t2.toSingle()) |> create
+    override t1.MulTT(t2) = RawTensorCPU.MulTT(t1, t2) |> create
+    override t1.MulTT0(t2) = RawTensorCPU.MulTT0(t1, t2.toSingle()) |> create
+    override t1.DivTT(t2) = RawTensorCPU.DivTT(t1, t2) |> create
+    override t2.DivFromT0T(t1) = RawTensorCPU.DivT0T(t1.toSingle(), t2) |> create
+    override t1.DivTT0(t2) = RawTensorCPU.DivTT0(t1, t2.toSingle()) |> create
+    override t1.PowTT(t2) = RawTensorCPU.PowTT(t1, t2) |> create
+    override t2.PowFromT0T(t1) = RawTensorCPU.PowT0T(t1.toSingle(), t2) |> create
+    override t1.PowTT0(t2) = RawTensorCPU.PowTT0(t1, t2.toSingle()) |> create
+    override t1.MatMulTT(t2) = RawTensorCPU.MatMulTT(t1, t2) |> create
+    override t1.BMMTT(t2) = RawTensorCPU.BMMTT(t1, t2) |> create
+    override t1.MaxPool1D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool1D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool2D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool2D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxPool3D(kernelSize, stride, padding) = let result, indices = RawTensorCPU.MaxPool3D(t1, kernelSize, stride, padding) in result :> _, indices :> _
+    override t1.MaxUnpool1D(indices, outputSize) = RawTensorCPU.MaxUnpool1D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool2D(indices, outputSize) = RawTensorCPU.MaxUnpool2D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.MaxUnpool3D(indices, outputSize) = RawTensorCPU.MaxUnpool3D(t1, indices :?> RawTensorCPU<int>, outputSize) :> _
+    override t1.Conv1D(t2, stride, padding) = RawTensorCPU.Conv1D (t1, t2, stride, padding) :> _
+    override t1.Conv2D(t2, stride, padding) = RawTensorCPU.Conv2D (t1, t2, stride, padding) :> _
+    override t1.Conv3D(t2, stride, padding) = RawTensorCPU.Conv3D (t1, t2, stride, padding) :> _
+    override t1.AvgPool1D(kernelSize, stride, padding) = RawTensorCPU.AvgPool1D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool2D(kernelSize, stride, padding) = RawTensorCPU.AvgPool2D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPool3D(kernelSize, stride, padding) = RawTensorCPU.AvgPool3D float32 (t1, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse1D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse1D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse2D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse2D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t1.AvgPoolReverse3D(originalInput, kernelSize, stride, padding) = RawTensorCPU.AvgPoolReverse3D float32 (t1, originalInput, kernelSize, stride, padding) :> _
+    override t.NegT() = RawTensorCPU.NegT (~-) (t) |> create
+    override t.SumT(resultType) =
+        let res = RawTensorCPU.SumT(t) |> create
+        match resultType with 
+        | None -> res
+        | Some dtype -> res.Cast(dtype)
+    override t.SumTDim(dim, resultType) =
+        let res = RawTensorCPU.SumTDim(t, dim)
+        match resultType with 
+        | None -> res :> _
+        | Some dtype -> res.Cast(dtype)
+    override t.SignT() = RawTensorCPU.SignT (sign >> float32) t |> create
+    override t.FloorT() = RawTensorCPU.FloorT(t) |> create
+    override t.CeilT() = RawTensorCPU.CeilT(t) |> create
+    override t.RoundT() = RawTensorCPU.RoundT(t) |> create
+    override t.AbsT() = RawTensorCPU.AbsT abs t |> create
+    override t.ReluT() = RawTensorCPU.ReluT(t) |> create
+    override t.SigmoidT() = RawTensorCPU.SigmoidT(t) |> create
+    override t.ExpT() = RawTensorCPU.ExpT(t) |> create
+    override t.LogT() = RawTensorCPU.LogT(t) |> create
+    override t.Log10T() = RawTensorCPU.Log10T(t) |> create
+    override t.SqrtT() = RawTensorCPU.SqrtT(t) |> create
+    override t.SinT() = RawTensorCPU.SinT(t) |> create
+    override t.CosT() = RawTensorCPU.CosT(t) |> create
+    override t.TanT() = RawTensorCPU.TanT(t) |> create
+    override t.SinhT() = RawTensorCPU.SinhT(t) |> create
+    override t.CoshT() = RawTensorCPU.CoshT(t) |> create
+    override t.TanhT() = RawTensorCPU.TanhT(t) |> create
+    override t.AsinT() = RawTensorCPU.AsinT(t) |> create
+    override t.AcosT() = RawTensorCPU.AcosT(t) |> create
+    override t.AtanT() = RawTensorCPU.AtanT(t) |> create
+    override t.InverseT() = opNotSupported "InverseT" t.Dtype
+    override t.DetT() = opNotSupported "DetT" t.Dtype
+    override a.SolveTT(_) = opNotSupported "SolveTT" a.Dtype
+
+    static member Seed(seed) = Random.Seed(seed)
+    static member Zero(device) = RawTensorCPU.Zero() |> createOn device
+    static member One(device) = RawTensorCPU.One() |> createOn device
+    static member Zeros(shape:Shape, device) = RawTensorCPU.Zeros(shape) |> createOn device
+    static member Empty(shape:Shape, device) = RawTensorCPU.Empty(shape) |> createOn device
+    static member Ones(shape:Shape, device) = RawTensorCPU.Ones(shape) |> createOn device
+    static member Full(shape:Shape, value:scalar, device) = RawTensorCPU.Full (shape, value.toSingle()) |> createOn device
+    static member Random(shape:Shape, device) = RawTensorCPU.Random float32 shape |> createOn device
+    static member RandomNormal(shape:Shape, device) = RawTensorCPU.RandomNormal float32 shape |> createOn device
+    static member RandomInt(shape:Shape, low:int, high:int, device) = RawTensorCPU.RandomInt float32 shape low high |> createOn device
+    static member CreateFromFlatArray(values:Array, shape, device) = RawTensorCPU.CreateFromFlatArray (values, shape) |> createOn device
+
+#if TEST_DUPLICATE_BACKEND
+type TestDuplicateBackendTensorStatics() = 
+#else
+type ReferenceBackendTensorStatics() = 
+#endif
+
+    inherit BackendTensorStatics()
+
+    override _.GetDevices(deviceType) =
+        match deviceType with 
+        | None -> [ Device.CPU (* ; Device.GPU *) ]
+        | Some DeviceType.CPU -> [ Device.CPU]
+        //| Some DeviceType.CUDA -> [ Device.GPU ]
+        | Some _ -> []
+
+    override _.IsDeviceTypeAvailable (deviceType) = (match deviceType with DeviceType.CPU -> true | _ -> false)
+    override _.Seed(seed) = Random.Seed(seed)
+    override _.Zero(dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Zero(device)
+        | BFloat16 -> RawTensorBFloat16.Zero(device)
+        | Float32 -> RawTensorFloat32.Zero(device)
+        | Float64 -> RawTensorFloat64.Zero(device)
+        | Int8 -> RawTensorInt8.Zero(device)
+        | Byte -> RawTensorByte.Zero(device)
+        | Int16 -> RawTensorInt16.Zero(device)
+        | Int32 -> RawTensorInt32.Zero(device)
+        | Int64 -> RawTensorInt64.Zero(device)
+        | Bool -> RawTensorBool.Zero(device)
+    override _.One(dtype, device) = 
+        match dtype with 
+        | Float16 -> RawTensorFloat16.One(device)
+        | BFloat16 -> RawTensorBFloat16.One(device)
+        | Float32 -> RawTensorFloat32.One(device)
+        | Float64 -> RawTensorFloat64.One(device)
+        | Int8 -> RawTensorInt8.One(device)
+        | Byte -> RawTensorByte.One(device)
+        | Int16 -> RawTensorInt16.One(device)
+        | Int32 -> RawTensorInt32.One(device)
+        | Int64 -> RawTensorInt64.One(device)
+        | Bool -> RawTensorBool.One(device)
+    override _.Zeros(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Zeros(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Zeros(shape, device)
+        | Float32 -> RawTensorFloat32.Zeros(shape, device)
+        | Float64 -> RawTensorFloat64.Zeros(shape, device)
+        | Int8 -> RawTensorInt8.Zeros(shape, device)
+        | Byte -> RawTensorByte.Zeros(shape, device)
+        | Int16 -> RawTensorInt16.Zeros(shape, device)
+        | Int32 -> RawTensorInt32.Zeros(shape, device)
+        | Int64 -> RawTensorInt64.Zeros(shape, device)
+        | Bool -> RawTensorBool.Zeros(shape, device)
+    override _.Empty(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Empty(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Empty(shape, device)
+        | Float32 -> RawTensorFloat32.Empty(shape, device)
+        | Float64 -> RawTensorFloat64.Empty(shape, device)
+        | Int8 -> RawTensorInt8.Empty(shape, device)
+        | Byte -> RawTensorByte.Empty(shape, device)
+        | Int16 -> RawTensorInt16.Empty(shape, device)
+        | Int32 -> RawTensorInt32.Empty(shape, device)
+        | Int64 -> RawTensorInt64.Empty(shape, device)
+        | Bool -> RawTensorBool.Empty(shape, device)
+    override _.Ones(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Ones(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Ones(shape, device)
+        | Float32 -> RawTensorFloat32.Ones(shape, device)
+        | Float64 -> RawTensorFloat64.Ones(shape, device)
+        | Int8 -> RawTensorInt8.Ones(shape, device)
+        | Byte -> RawTensorByte.Ones(shape, device)
+        | Int16 -> RawTensorInt16.Ones(shape, device)
+        | Int32 -> RawTensorInt32.Ones(shape, device)
+        | Int64 -> RawTensorInt64.Ones(shape, device)
+        | Bool -> RawTensorBool.Ones(shape, device)
+    override _.Full(shape:Shape, value:scalar, dtype, device) = 
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Full(shape, value, device)
+        | BFloat16 -> RawTensorBFloat16.Full(shape, value, device)
+        | Float32 -> RawTensorFloat32.Full(shape, value, device)
+        | Float64 -> RawTensorFloat64.Full(shape, value, device)
+        | Int8 -> RawTensorInt8.Full(shape, value, device)
+        | Byte -> RawTensorByte.Full(shape, value, device)
+        | Int16 -> RawTensorInt16.Full(shape, value, device)
+        | Int32 -> RawTensorInt32.Full(shape, value, device)
+        | Int64 -> RawTensorInt64.Full(shape, value, device)
+        | Bool -> RawTensorBool.Full(shape, value, device)
+    override _.Random(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.Random(shape, device)
+        | BFloat16 -> RawTensorBFloat16.Random(shape, device)
+        | Float32 -> RawTensorFloat32.Random(shape, device)
+        | Float64 -> RawTensorFloat64.Random(shape, device)
+        | Int8 -> RawTensorInt8.Random(shape, device)
+        | Byte -> RawTensorByte.Random(shape, device)
+        | Int16 -> RawTensorInt16.Random(shape, device)
+        | Int32 -> RawTensorInt32.Random(shape, device)
+        | Int64 -> RawTensorInt64.Random(shape, device)
+        | Bool -> RawTensorBool.Random(shape, device)
+    override _.RandomNormal(shape:Shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.RandomNormal(shape, device)
+        | BFloat16 -> RawTensorBFloat16.RandomNormal(shape, device)
+        | Float32 -> RawTensorFloat32.RandomNormal(shape, device)
+        | Float64 -> RawTensorFloat64.RandomNormal(shape, device)
+        | Int8 -> RawTensorInt8.RandomNormal(shape, device)
+        | Byte -> RawTensorByte.RandomNormal(shape, device)
+        | Int16 -> RawTensorInt16.RandomNormal(shape, device)
+        | Int32 -> RawTensorInt32.RandomNormal(shape, device)
+        | Int64 -> RawTensorInt64.RandomNormal(shape, device)
+        | Bool -> RawTensorBool.RandomNormal(shape, device)
+    override _.RandomInt(shape:Shape, low:int, high:int, dtype, device) = 
+        match dtype with 
+        | Float16 -> RawTensorFloat16.RandomInt(shape, low, high, device)
+        | BFloat16 -> RawTensorBFloat16.RandomInt(shape, low, high, device)
+        | Float32 -> RawTensorFloat32.RandomInt(shape, low, high, device)
+        | Float64 -> RawTensorFloat64.RandomInt(shape, low, high, device)
+        | Int8 -> RawTensorInt8.RandomInt(shape, low, high, device)
+        | Byte -> RawTensorByte.RandomInt(shape, low, high, device)
+        | Int16 -> RawTensorInt16.RandomInt(shape, low, high, device)
+        | Int32 -> RawTensorInt32.RandomInt(shape, low, high, device)
+        | Int64 -> RawTensorInt64.RandomInt(shape, low, high, device)
+        | Bool -> RawTensorBool.RandomInt(shape, low, high, device)
+    override _.CreateFromFlatArray(values:Array, shape, dtype, device) =
+        match dtype with 
+        | Float16 -> RawTensorFloat16.CreateFromFlatArray(values, shape, device)
+        | BFloat16 -> RawTensorBFloat16.CreateFromFlatArray(values, shape, device)
+        | Float32 -> RawTensorFloat32.CreateFromFlatArray(values, shape, device)
+        | Float64 -> RawTensorFloat64.CreateFromFlatArray(values, shape, device)
+        | Int8 -> RawTensorInt8.CreateFromFlatArray(values, shape, device)
+        | Byte -> RawTensorByte.CreateFromFlatArray(values, shape, device)
+        | Int16 -> RawTensorInt16.CreateFromFlatArray(values, shape, device)
+        | Int32 -> RawTensorInt32.CreateFromFlatArray(values, shape, device)
+        | Int64 -> RawTensorInt64.CreateFromFlatArray(values, shape, device)
+        | Bool -> RawTensorBool.CreateFromFlatArray(values, shape, device)
+
diff --git a/tests/TensorMath.Backends.TestDuplicate/TensorMath.Backends.TestDuplicate.fsproj b/tests/TensorMath.Backends.TestDuplicate/TensorMath.Backends.TestDuplicate.fsproj
new file mode 100644
index 0000000..6f3c7a7
--- /dev/null
+++ b/tests/TensorMath.Backends.TestDuplicate/TensorMath.Backends.TestDuplicate.fsproj
@@ -0,0 +1,16 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netstandard2.1</TargetFramework>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="Reference.RawTensor.fs" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\TensorMath\TensorMath.fsproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/tests/TensorMath.Tests/Program.fs b/tests/TensorMath.Tests/Program.fs
index 0695f84..3a00018 100644
--- a/tests/TensorMath.Tests/Program.fs
+++ b/tests/TensorMath.Tests/Program.fs
@@ -1 +1,6 @@
-module Program = let [<EntryPoint>] main _ = 0
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+module Program = let [<EntryPoint>] main _ = 0
\ No newline at end of file
diff --git a/tests/TensorMath.Tests/TensorMath.Tests.fsproj b/tests/TensorMath.Tests/TensorMath.Tests.fsproj
index 7f5e556..0320664 100644
--- a/tests/TensorMath.Tests/TensorMath.Tests.fsproj
+++ b/tests/TensorMath.Tests/TensorMath.Tests.fsproj
@@ -9,16 +9,43 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <Compile Include="Tests.fs" />
+    <Compile Include="TestUtils.fs" />
+    <Compile Include="TestCombo.fs" />
+    <Compile Include="TestCombos.fs" />
+    <Compile Include="TestTensor.fs" />
+    <Compile Include="TestTensor.Conv.fs" />
+    <Compile Include="TestTensor.MaxPool.fs" />
+    <Compile Include="TestTensorMath.fs" />
+    <Compile Include="TestRandom.fs" />
+    <Compile Include="TestOp.AvgPool.fs" />
+    <Compile Include="TestOp.BMM.fs" />
+    <Compile Include="TestOp.Inv.fs" />
+    <Compile Include="TestOp.Det.fs" />
+    <Compile Include="TestOp.Norm.fs" />
+    <Compile Include="TestOp.Outer.fs" />
+    <Compile Include="TestOp.Solve.fs" />
     <Compile Include="Program.fs" />
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="coverlet.collector" Version="6.0.0" />
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.5.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.3" />
-    <PackageReference Include="FsCheck.Xunit" Version="3.0.0-rc3" />
+    <PackageReference Include="coverlet.collector" Version="6.0.2">
+      <PrivateAssets>all</PrivateAssets>
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+    </PackageReference>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
+    <PackageReference Include="NUnit" Version="4.1.0" />
+    <PackageReference Include="NUnit.Analyzers" Version="4.2.0">
+      <PrivateAssets>all</PrivateAssets>
+      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+    </PackageReference>
+    <PackageReference Include="NUnit3TestAdapter" Version="4.5.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\TensorMath.Backends.Reference\TensorMath.Backends.Reference.fsproj" />
+    <ProjectReference Include="..\..\src\TensorMath.Backends.Torch\TensorMath.Backends.Torch.fsproj" />
+    <ProjectReference Include="..\..\src\TensorMath\TensorMath.fsproj" />
+    <ProjectReference Include="..\TensorMath.Backends.TestDuplicate\TensorMath.Backends.TestDuplicate.fsproj" />
   </ItemGroup>
 
 </Project>
diff --git a/tests/TensorMath.Tests/TestCombo.fs b/tests/TensorMath.Tests/TestCombo.fs
new file mode 100644
index 0000000..c082dd9
--- /dev/null
+++ b/tests/TensorMath.Tests/TestCombo.fs
@@ -0,0 +1,132 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open System
+open TensorMath
+
+// This captures the expected semantics of different Dtypes
+type ComboInfo(?defaultBackend: Backend, ?defaultDevice: Device, ?defaultDtype: Dtype, ?defaultFetchDevices: (DeviceType option * Backend option -> Device list)) =
+
+    let dflt x y = match x with Some x -> Some x | None -> y
+
+    member _.backend = defaultArg defaultBackend Backend.Default
+
+    member _.device = defaultArg defaultDevice Device.Default
+
+    member _.devices(?deviceType, ?backend) = 
+       let f = defaultArg defaultFetchDevices (fun (deviceType, backend) -> dsharp.devices(?deviceType=deviceType, ?backend=backend))
+       f (deviceType, backend)
+
+    member _.dtype = defaultArg defaultDtype Dtype.Default
+    
+    member _.tensor(data: obj, ?device, ?backend, ?dtype) =
+        dsharp.tensor(data, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.randn(shape:seq<int>, ?device, ?backend, ?dtype) =
+        dsharp.randn(shape, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.randn(length:int, ?device, ?backend, ?dtype) =
+        dsharp.randn(length, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.rand(shape:seq<int>, ?device, ?backend, ?dtype) =
+        dsharp.rand(shape, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.rand(length:int, ?device, ?backend, ?dtype) =
+        dsharp.rand(length, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.randint(low:int, high:int, shape:seq<int>, ?device, ?backend, ?dtype) =
+        dsharp.randint(low, high, shape, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.randint(low:int, high:int, length:int, ?device, ?backend, ?dtype) =
+        dsharp.randint(low, high, length, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.full(shape:seq<int>, value, ?device, ?backend, ?dtype) =
+        dsharp.full(shape, value, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.full(length:int, value:scalar, ?device, ?backend, ?dtype) =
+        dsharp.full(length, value, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.ones(shape:seq<int>, ?device, ?backend, ?dtype) =
+        dsharp.ones(shape, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.ones(length:int, ?device, ?backend, ?dtype) =
+        dsharp.ones(length, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.zeros(shape:seq<int>, ?device, ?backend, ?dtype) =
+        dsharp.zeros(shape, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.zeros(length:int, ?device, ?backend, ?dtype) =
+        dsharp.zeros(length, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.empty(?device, ?backend, ?dtype) =
+        dsharp.empty(?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.empty(shape:seq<int>, ?device, ?backend, ?dtype) =
+        dsharp.empty(shape, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.empty(length:int, ?device, ?backend, ?dtype) =
+        dsharp.empty(length, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.one(?device, ?backend, ?dtype) =
+        dsharp.one(?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.zero(?device, ?backend, ?dtype) =
+        dsharp.zero(?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.move(tensor, ?device, ?backend, ?dtype) =
+        dsharp.move(tensor, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.onehot(length, hot, ?device, ?backend, ?dtype) =
+        dsharp.onehot(length, hot, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.eye(rows:int, ?cols:int, ?device, ?backend, ?dtype) =
+        dsharp.eye(rows, ?cols=cols, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.arange(endVal:float, ?startVal:float, ?step:float, ?device, ?backend, ?dtype) =
+        dsharp.arange(endVal, ?startVal=startVal, ?step=step, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.arange(endVal:int, ?startVal:int, ?step:int, ?device, ?backend, ?dtype) =
+        dsharp.arange(endVal, ?startVal=startVal, ?step=step, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.linspace(startVal:float, endVal:float, steps:int, ?device, ?backend, ?dtype) =
+        dsharp.linspace(startVal, endVal, steps, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.linspace(startVal:int, endVal:int, steps:int, ?device, ?backend, ?dtype) =
+        dsharp.linspace(startVal, endVal, steps, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.logspace(startVal:float, endVal:float, steps:int, ?baseVal, ?device, ?backend, ?dtype) =
+        dsharp.logspace(startVal, endVal, steps, ?baseVal=baseVal, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member _.logspace(startVal:int, endVal:int, steps:int, ?baseVal, ?device, ?backend, ?dtype) =
+        dsharp.logspace(startVal, endVal, steps, ?baseVal=baseVal, ?device=dflt device defaultDevice, ?backend=dflt backend defaultBackend, ?dtype=dflt dtype defaultDtype)
+
+    member c.arrayCreator1D(arr: double[]) =
+        match c.dtype with 
+        | Dtype.Float16 -> arr |> Array.map float32 :> Array
+        | Dtype.BFloat16 -> arr |> Array.map float32 :> Array
+        | Dtype.Float32 -> arr |> Array.map float32 :> Array
+        | Dtype.Float64 -> arr |> Array.map double :> Array
+        | Dtype.Byte -> arr |> Array.map byte :> Array
+        | Dtype.Int8 -> arr |> Array.map int8 :> Array
+        | Dtype.Int16 -> arr |> Array.map int16:> Array
+        | Dtype.Int32 -> arr |> Array.map int32 :> Array
+        | Dtype.Int64  -> arr |> Array.map int64 :> Array
+        | Dtype.Bool -> arr |> Array.map (fun x -> abs x >= 1.0) :> Array
+
+    member c.arrayCreator2D(arr: double[,]) : Array =
+        match c.dtype with 
+        | Dtype.BFloat16 -> arr |> Array2D.map float32 :> Array
+        | Dtype.Float16 -> arr |> Array2D.map float32 :> Array
+        | Dtype.Float32 -> arr |> Array2D.map float32 :> Array
+        | Dtype.Float64 -> arr |> Array2D.map double :> Array
+        | Dtype.Byte -> arr |> Array2D.map byte :> Array
+        | Dtype.Int8 -> arr |> Array2D.map int8 :> Array
+        | Dtype.Int16 -> arr |> Array2D.map int16:> Array
+        | Dtype.Int32 -> arr |> Array2D.map int32 :> Array
+        | Dtype.Int64  -> arr |> Array2D.map int64 :> Array
+        | Dtype.Bool -> arr |> Array2D.map (fun x -> abs x >= 1.0) :> Array
+
diff --git a/tests/TensorMath.Tests/TestCombos.fs b/tests/TensorMath.Tests/TestCombos.fs
new file mode 100644
index 0000000..6251515
--- /dev/null
+++ b/tests/TensorMath.Tests/TestCombos.fs
@@ -0,0 +1,76 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open TensorMath
+
+module Dtypes =
+
+    // We run most tests at all these tensor types
+    let Bool = [ Dtype.Bool ]
+    let SignedIntegral = [ Dtype.Int8; Dtype.Int16; Dtype.Int32; Dtype.Int64 ]
+    let UnsignedIntegral = [ Dtype.Byte ]
+    let Integral = SignedIntegral @ UnsignedIntegral
+    let Float16s = [ Dtype.Float16; Dtype.BFloat16 ]
+    let FloatingPointExcept16s = [ Dtype.Float32; Dtype.Float64 ]
+    let FloatingPoint = Float16s @ FloatingPointExcept16s
+    let Float32 = [ Dtype.Float32 ]
+
+    // Some operations have quirky behaviour on bool types, we pin these down manually
+    let SignedIntegralAndFloatingPointExcept16s = FloatingPointExcept16s @ SignedIntegral
+    let SignedIntegralAndFloatingPoint = FloatingPoint @ SignedIntegral
+    let IntegralAndFloatingPointExcept16s = FloatingPointExcept16s @ Integral
+    let IntegralAndFloatingPoint = FloatingPoint @ Integral
+    let IntegralAndBool = Integral @ Bool
+    let AllExceptFloat16s = FloatingPointExcept16s @ Integral @ Bool
+    let All = FloatingPoint @ Integral @ Bool
+
+module Combos =
+
+    // Use these to experiment in your local branch
+    //let backends = [ Backend.Reference ]
+    //let backends = [ Backend.Torch ]
+    //let backends = [ Backend.Reference; Backend.Torch; Backend.Register("TestDuplicate") ]
+    //let backends = [ Backend.Reference; Backend.Torch ]
+    //let backends = [ Backend.Reference; Backend.Register("TestDuplicate") ]
+    //let backends = [ Backend.Register("TestDuplicate") ]
+    //let getDevices _ = [ Device.CPU ]
+    //let getDevices _ = [ Device.GPU ]
+    
+    //Use this in committed code
+    let backends = [ Backend.Reference; Backend.Torch ]
+    let getDevices (deviceType: DeviceType option, backend: Backend option) =
+        dsharp.devices(?deviceType=deviceType, ?backend=backend)
+
+    let makeCombos dtypes =
+        [ for backend in backends do
+            let ds = getDevices (None, Some backend)
+            for device in ds do
+              for dtype in dtypes do
+                yield ComboInfo(defaultBackend=backend, defaultDevice=device, defaultDtype=dtype, defaultFetchDevices=getDevices) ]
+
+    /// These runs though all devices, backends and various Dtype
+    let Float32 = makeCombos Dtypes.Float32
+    let Integral = makeCombos Dtypes.Integral
+    let FloatingPointExcept16s = makeCombos Dtypes.FloatingPointExcept16s
+    let FloatingPoint = makeCombos Dtypes.FloatingPoint
+    let UnsignedIntegral = makeCombos Dtypes.UnsignedIntegral
+    let SignedIntegral = makeCombos Dtypes.SignedIntegral
+    let SignedIntegralAndFloatingPointExcept16s = makeCombos Dtypes.SignedIntegralAndFloatingPointExcept16s
+    let SignedIntegralAndFloatingPoint = makeCombos Dtypes.SignedIntegralAndFloatingPoint
+    let IntegralAndFloatingPointExcept16s = makeCombos Dtypes.IntegralAndFloatingPointExcept16s
+    let IntegralAndFloatingPoint = makeCombos Dtypes.IntegralAndFloatingPoint
+    let Bool = makeCombos Dtypes.Bool
+    let IntegralAndBool = makeCombos Dtypes.IntegralAndBool
+    let All = makeCombos Dtypes.All
+    let AllExcept16s = makeCombos Dtypes.All
+
+    /// This runs though all devices and backends but leaves the default Dtype
+    let AllDevicesAndBackendsFloat32 = 
+        [ for backend in backends do
+            let ds = getDevices (None, Some backend)
+            for device in ds do
+              yield ComboInfo(defaultDtype=Dtype.Float32, defaultBackend=backend, defaultDevice=device, defaultFetchDevices=getDevices) ]
diff --git a/tests/TensorMath.Tests/TestOp.AvgPool.fs b/tests/TensorMath.Tests/TestOp.AvgPool.fs
new file mode 100644
index 0000000..8f64840
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.AvgPool.fs
@@ -0,0 +1,358 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorAvgPool () =
+    [<Test>]
+    member _.TestTensorAvgPool1D () =
+        for combo in Combos.Float32 do
+            let t = combo.tensor([[[-2.1704, -1.1558,  2.5995,  1.3858, -1.3157, -0.3179,  0.9593,  -2.1432,  0.7169, -1.7999],
+                                   [ 0.4564, -0.2262,  0.3495,  0.4587, -0.3858,  0.2349,  0.2978,  0.6288,  1.1539,  0.2121]],
+
+                                  [[ 0.6654,  0.7151,  0.9980,  0.1321, -2.0009, -1.1897,  1.0608,  -1.8059, -0.2344,  1.6387],
+                                   [ 1.1872, -2.2679, -0.0297, -0.2067, -1.5622, -0.3916,  0.6039,  -1.1469,  0.4560,  1.2069]]])
+
+            let tk3 = t.avgpool1d(kernelSize=3)
+            let tk3Correct =
+                combo.tensor([[[-0.2422, -0.0826, -0.1557],
+                               [ 0.1932,  0.1026,  0.6935]],
+
+                              [[ 0.7928, -1.0195, -0.3265],
+                               [-0.3701, -0.7202, -0.0290]]])
+            Assert.That(tk3Correct.allclose(tk3,0.01))
+
+            let tk3p1 = t.avgpool1d(3, padding=1)
+            let tk3p1Correct = combo.tensor([[[-1.1087,  0.8899, -0.5006, -0.3610],
+                                              [ 0.0767,  0.1408,  0.3872,  0.4553]],
+
+                                             [[ 0.4602, -0.2903, -0.6449,  0.4681],
+                                              [-0.3602, -0.5995, -0.3115,  0.5543]]])
+            Assert.That(tk3p1Correct.allclose(tk3p1,0.01))
+
+            let tk3s2 = t.avgpool1d(3, stride=2)
+            let tk3s2Correct = combo.tensor([[[-0.2422,  0.8899, -0.2248, -0.1557],
+                                              [ 0.1932,  0.1408,  0.0490,  0.6935]],
+
+                                             [[ 0.7928, -0.2903, -0.7099, -0.3265],
+                                              [-0.3701, -0.5995, -0.4500, -0.0290]]])
+            Assert.That(tk3s2Correct.allclose(tk3s2,0.01))
+
+            let tk4s3p2 = t.avgpool1d(4, stride=3, padding=2)
+            let tk4s3p2Correct = combo.tensor([[[-0.8316,  0.3784, -0.7044, -0.8066],
+                                                [ 0.0576,  0.0491,  0.1939,  0.4987]],
+
+                                               [[ 0.3451, -0.0389, -0.9839, -0.1004],
+                                                [-0.2702, -1.0166, -0.6242,  0.1290]]])
+            Assert.That(tk4s3p2Correct.allclose(tk4s3p2,0.01))
+
+        for combo in Combos.IntegralAndBool do 
+            let x = combo.zeros([1;4;4])
+            isInvalidOp(fun () -> x.avgpool1d(3))
+
+    [<Test>]
+    member _.TestTensorAvgPool2D () =
+        for combo in Combos.Float32 do
+            let t = combo.tensor([[[[ 0.7372,  0.7090,  0.9216,  0.3363,  1.0141, -0.7642,  0.3801, -0.9568],
+                                      [-0.3520, -1.2336,  1.8489,  0.9929, -0.8138,  0.0978, -1.3206, -1.5434],
+                                      [ 0.6883, -0.2346,  0.1735,  0.6695, -1.9122,  1.1338, -0.1248,  0.2164],
+                                      [-1.1349,  0.3008, -0.1635, -1.0362, -0.6487, -0.8422, -0.4334,  1.0604],
+                                      [-2.1562, -0.1079,  0.5744, -0.7275,  1.0254, -0.0508, -0.0525, -0.0746],
+                                      [-0.7494,  0.6819, -1.7327, -0.4838, -0.6120,  1.6331,  0.1797, -0.6068],
+                                      [ 0.6400,  0.1389,  0.3033,  0.3195,  0.9934,  1.2455, -1.0953,  0.9922],
+                                      [ 0.2375,  0.6003, -1.1614,  1.0146,  0.2100, -1.0145, -0.1933,  1.1415]],
+
+                                     [[-0.0819,  0.2091,  0.4351,  1.7527, -1.1970,  2.1048,  1.0200, -0.5153],
+                                      [ 1.0867, -1.8738, -0.2754, -0.5089,  0.8850, -0.4751, -0.7820,  1.4476],
+                                      [-0.9072,  0.9977, -0.9106, -0.3171, -1.2444,  0.7102,  0.5656,  1.2660],
+                                      [ 0.1986, -0.4967,  0.2384, -0.6551,  1.0156,  0.0520, -0.1964,  1.1367],
+                                      [ 0.8948,  2.2070,  0.9938,  0.5311, -1.0674,  0.3894,  0.4192, -0.6235],
+                                      [ 2.7646, -0.6509,  0.4669, -1.8774, -0.6341,  0.5113,  1.2398,  2.5090],
+                                      [ 1.0722,  0.8162, -2.3271,  1.3826,  1.3832,  0.6205, -0.9138, -0.8237],
+                                      [-0.0688, -1.6786,  0.1672, -0.7255, -0.1228, -0.1603, -2.1906, -2.6372]]],
+
+
+                                    [[[-1.0461,  0.4063,  0.2085, -0.7598, -1.3893, -0.8866,  1.0594, -0.6184],
+                                      [ 2.1120, -0.6475, -0.3964,  0.0378,  0.0138, -0.1672,  0.9265, -1.7734],
+                                      [-0.2313,  0.6284, -0.0508, -0.1014, -0.5059,  0.8666, -0.7010, -0.5073],
+                                      [ 0.1709,  0.2466,  0.1781, -1.6740, -0.0251, -1.4144, -2.1012,  0.3922],
+                                      [ 0.9141,  0.6582, -0.0826, -0.7104,  1.7133,  1.2406,  1.1415, -0.6222],
+                                      [-2.1525, -0.2996, -1.3787,  0.0336, -1.4643,  0.6534,  0.3996,  0.3145],
+                                      [-0.3298,  0.3855, -0.5100,  1.2770,  0.5306, -0.6604, -0.0489,  0.0609],
+                                      [-0.1552, -1.1218, -0.8435,  0.2365,  1.4428,  0.4234, -1.1083, -1.3874]],
+
+                                     [[ 0.0511,  0.1216, -1.0103, -1.2529,  1.7200, -0.0225,  0.7446, -0.8076],
+                                      [ 0.2543,  1.4250,  0.7869,  0.0526, -2.1598,  1.8228, -0.4628,  1.4234],
+                                      [ 0.5492,  0.8668,  0.2120,  0.6599, -1.0934, -1.3726,  0.4788, -0.1171],
+                                      [ 0.5121,  1.2607, -0.4565,  0.5448, -2.5025, -0.5503, -1.3373,  0.1711],
+                                      [-0.3939, -0.6382, -0.0899, -1.4706,  0.4580,  0.3304,  1.8958,  0.1178],
+                                      [ 0.1109,  0.2468,  0.3485, -0.0960, -0.0432, -0.3026, -1.9750,  0.4057],
+                                      [-1.1117, -0.3422,  1.2130, -1.1206,  0.9506, -0.7723,  0.3162, -0.5487],
+                                      [ 0.6304, -0.9149,  0.6075, -0.5371,  1.5875, -0.2979, -0.5832, -3.0311]]]])
+
+            // Python: tk3 = torch.nn.functional.avg_pool2d(t, 3)
+            let tk3 = t.avgpool2d(kernelSize=3)
+            let tk3Correct =
+                combo.tensor([[[[ 0.3620,  0.0838],
+                                [-0.4986, -0.1936]],
+
+                               [[-0.1467,  0.1900],
+                                [ 0.7352, -0.1927]]],
+
+
+                              [[[ 0.1092, -0.3213],
+                                [-0.1939, -0.1830]],
+
+                               [[ 0.3618, -0.1829],
+                                [ 0.1001, -0.4036]]]])
+            Assert.That(tk3Correct.allclose(tk3,0.01))
+
+            let tk3p1 = t.avgpool2d(3, padding=1)
+            let tk3p1Correct = combo.tensor([[[[-0.0155,  0.4778, -0.4563],
+                                               [-0.2938, -0.2273,  0.0925],
+                                               [ 0.1721, -0.1277,  0.2536]],
+
+                                              [[-0.0733,  0.1213,  0.3111],
+                                               [ 0.3216, -0.1573,  0.4132],
+                                               [ 0.2505, -0.2541, -0.2050]]],
+
+
+                                             [[[ 0.0916, -0.2539, -0.1622],
+                                               [ 0.2652, -0.1399, -0.1895],
+                                               [-0.4082, -0.0751, -0.1504]],
+
+                                              [[ 0.2058, -0.2071,  0.2998],
+                                               [ 0.2396, -0.4154, -0.0426],
+                                               [-0.1534,  0.3234, -0.7543]]]])
+            Assert.That(tk3p1Correct.allclose(tk3p1,0.01))
+
+            let tk3s2 = t.avgpool2d(3, stride=2)
+            let tk3s2Correct = combo.tensor([[[[ 0.3620,  0.3590, -0.2566],
+                                               [-0.2289, -0.2273, -0.2117],
+                                               [-0.2675, -0.0378,  0.3629]],
+
+                                              [[-0.1467, -0.1534,  0.1763],
+                                               [ 0.3573, -0.1573,  0.0715],
+                                               [ 0.6931, -0.1276,  0.2165]]],
+
+
+                                             [[[ 0.1092, -0.3271, -0.0871],
+                                               [ 0.2702, -0.1399,  0.0238],
+                                               [-0.3106, -0.0657,  0.3895]],
+
+                                              [[ 0.3618, -0.2317, -0.0383],
+                                               [ 0.2025, -0.4154, -0.4103],
+                                               [-0.0730,  0.0166,  0.0953]]]])
+            Assert.That(tk3s2Correct.allclose(tk3s2,0.01))
+
+            let tk4s3p2 = t.avgpool2d(4, stride=3, padding=2)
+            let tk4s3p2Correct = combo.tensor([[[[-0.0087,  0.2360, -0.2442],
+                                                  [-0.2644, -0.0808, -0.2677],
+                                                  [-0.0447,  0.0648,  0.2326]],
+
+                                                 [[-0.0412, -0.0358,  0.1555],
+                                                  [ 0.1317, -0.0301,  0.2187],
+                                                  [ 0.3348, -0.0710, -0.1313]]],
+
+
+                                                [[[ 0.0515, -0.1579, -0.1772],
+                                                  [ 0.2407, -0.0449, -0.0952],
+                                                  [-0.1313, -0.0083,  0.1643]],
+
+                                                 [[ 0.1157, -0.0198,  0.1411],
+                                                  [ 0.2398, -0.1340, -0.1811],
+                                                  [-0.1508,  0.0100, -0.0932]]]])
+            Assert.That(tk4s3p2Correct.allclose(tk4s3p2,0.01))
+
+        for combo in Combos.IntegralAndBool do 
+            let x = combo.zeros([1;4;4])
+            isInvalidOp(fun () -> x.avgpool2d(3))
+
+    [<Test>]
+    member _.TestTensorAvgPool3D () =
+        for combo in Combos.Float32 do
+            let t = combo.tensor([[[[ 0.4633,  0.9173,  0.4568, -1.7660, -0.1077],
+                                       [-2.1112,  1.5542,  0.5720, -1.0952, -1.8144],
+                                       [ 0.3505, -0.9843, -2.5655, -0.9835,  1.2303],
+                                       [ 0.8156,  1.5415,  1.3066, -1.1820,  0.2060],
+                                       [ 0.0684,  1.5936,  0.2956, -0.5176, -1.6960]],
+
+                                      [[-1.7281, -0.7697, -2.2310,  0.3580,  0.6299],
+                                       [ 0.8558, -0.6180, -1.6077, -0.6779,  1.2910],
+                                       [ 0.1885, -0.7006, -0.1863, -1.6729, -0.5761],
+                                       [ 0.1940, -0.0399,  0.9329,  1.0687,  0.0955],
+                                       [-1.0189,  0.4046,  1.1762,  0.3842,  0.6831]],
+
+                                      [[ 0.2996,  0.5738,  0.0369,  0.2835, -0.2363],
+                                       [ 0.6847, -0.4949, -0.3974,  0.6808, -1.2942],
+                                       [ 1.0910, -0.0594, -0.0037, -0.3355, -1.5056],
+                                       [-0.0965,  1.1358,  1.2851, -1.7333, -1.1705],
+                                       [ 0.0966, -1.2780,  1.2939,  1.3469, -0.2603]],
+
+                                      [[-0.5270,  1.1442,  0.1259, -1.2813,  0.3536],
+                                       [ 0.1579,  0.0828,  1.3531, -0.9110, -0.8747],
+                                       [ 0.2473, -0.1507, -0.4880,  0.4575,  1.1186],
+                                       [ 2.0900,  1.0479, -0.7209, -1.6928,  1.8761],
+                                       [ 2.2015, -0.5097,  0.7364, -1.5177,  0.9212]],
+
+                                      [[ 1.0358,  1.6584, -1.9654, -1.3971,  1.5641],
+                                       [ 0.4032,  0.7737,  0.9351, -0.5245,  0.0783],
+                                       [-1.2932, -0.9885, -1.1850, -0.7403,  0.1739],
+                                       [-0.5471,  0.5017, -1.0571,  1.7574, -0.0911],
+                                       [ 0.6944, -1.2772,  0.7473, -1.0983,  1.1462]]],
+
+
+                                     [[[-1.2563,  0.0688,  1.0405, -0.2582,  0.7333],
+                                       [ 2.0711, -0.1815,  0.8876, -0.2907,  1.1195],
+                                       [-0.3912,  0.3624,  1.0576, -0.4748, -1.4021],
+                                       [ 1.2176, -0.6160, -0.3471,  1.1689,  0.5677],
+                                       [-0.0639,  0.3765, -0.2614,  1.8267,  0.0315]],
+
+                                      [[ 1.2927,  1.0709, -0.8808,  0.8106, -0.5315],
+                                       [ 0.7614, -0.3935,  1.2451, -0.0598, -0.5887],
+                                       [-0.4089, -0.8598,  0.2478,  0.1282, -0.2745],
+                                       [-0.4139, -1.2905, -0.2625, -2.0453,  1.8941],
+                                       [-0.2400, -1.2830, -0.3503, -0.8536, -0.5927]],
+
+                                      [[ 0.8200,  1.8860, -0.5216, -0.9590, -0.9760],
+                                       [-1.5796,  2.2379, -0.5714, -1.5612,  1.4035],
+                                       [-0.6434, -1.2257,  0.1408,  0.3781, -2.2344],
+                                       [ 0.4963,  0.2431,  0.6835,  0.0047,  1.3374],
+                                       [-1.5899,  2.5382,  0.9503,  1.9080,  1.8315]],
+
+                                      [[ 0.5853,  1.9343, -0.7472,  2.1774, -2.1895],
+                                       [-0.6187, -0.2870,  1.2485,  2.4069, -0.2632],
+                                       [-1.6047, -0.3379,  0.5372,  1.7098,  1.6220],
+                                       [ 0.5255,  0.2564, -1.8615,  1.5519, -0.5655],
+                                       [-0.9452, -1.1828, -1.8192,  1.1349,  0.9806]],
+
+                                      [[-1.8198,  0.5455,  1.1761,  1.3070, -0.4654],
+                                       [ 1.2673,  0.2608,  0.8385, -1.0407, -0.6288],
+                                       [-0.3860,  1.3343,  1.3084,  0.5794,  0.4639],
+                                       [ 0.4750, -0.9006, -1.5002,  0.8689, -0.0379],
+                                       [ 0.2891,  0.0195, -0.0503, -0.3235,  1.5407]]]]).unsqueeze(0)
+
+            // Python: tk3 = torch.nn.functional.avg_pool3d(t, 2)
+            let tk3 = t.avgpool3d(kernelSize=2)
+            let tk3Correct =
+                combo.tensor([[[[-0.1796, -0.7489],
+                                   [ 0.1707, -0.4103]],
+
+                                  [[ 0.2401, -0.0137],
+                                   [ 0.6632, -0.4040]]],
+
+
+                                 [[[ 0.4292,  0.3118],
+                                   [-0.3000, -0.0659]],
+
+                                  [[ 0.6223,  0.1840],
+                                   [-0.2863,  0.3931]]]]).unsqueeze(0)
+            Assert.That(tk3Correct.allclose(tk3,0.01))
+
+            let tk3p1 = t.avgpool3d(3, padding=1)
+            let tk3p1Correct = combo.tensor([[[[-0.0532, -0.2219],
+                                                   [ 0.0894, -0.0741]],
+
+                                                  [[ 0.2145, -0.1285],
+                                                   [ 0.1076, -0.0274]]],
+
+
+                                                 [[[ 0.1272,  0.1195],
+                                                   [-0.1337,  0.0022]],
+
+                                                  [[ 0.1938,  0.0235],
+                                                   [-0.0977,  0.4126]]]]).unsqueeze(0)
+            Assert.That(tk3p1Correct.allclose(tk3p1,0.05))
+
+            let tk3s2 = t.avgpool3d(3, stride=2)
+            let tk3s2Correct = combo.tensor([[[[-0.2375, -0.5007],
+                                                   [ 0.2532, -0.1142]],
+
+                                                  [[ 0.1130, -0.2214],
+                                                   [ 0.1301, -0.0274]]],
+
+
+                                                 [[[ 0.2325, -0.0886],
+                                                   [-0.0716,  0.1873]],
+
+                                                  [[ 0.2140,  0.1903],
+                                                   [-0.1574,  0.4126]]]]).unsqueeze(0)
+            Assert.That(tk3s2Correct.allclose(tk3s2,0.01))
+
+            // Python: tk3 = torch.nn.functional.avg_pool3d(t, 4, stride=3, padding=2)
+            let tk4s3p2 = t.avgpool3d(4, stride=3, padding=2)
+            let tk4s3p2Correct = combo.tensor([[[[-0.0224, -0.0767],
+                                                   [ 0.0327, -0.0403]],
+
+                                                  [[ 0.0552, -0.0525],
+                                                   [ 0.0590, -0.0462]]],
+
+
+                                                 [[[ 0.0536,  0.0592],
+                                                   [-0.0211, -0.0237]],
+
+                                                  [[ 0.1244,  0.1232],
+                                                   [-0.0857,  0.1654]]]]).unsqueeze(0)
+            Assert.That(tk4s3p2Correct.allclose(tk4s3p2,0.01))
+
+        for combo in Combos.IntegralAndBool do 
+            let x = combo.zeros([1;4;4])
+            isInvalidOp(fun () -> x.avgpool3d(3))
+
+    [<Test>]
+    member _.TestTensorAvgPoolReverse1D () =
+        for combo in Combos.Float32 do
+            let t = combo.ones([2;2;6])
+
+            let tk3 = t.avgpool1d(kernelSize=3)
+            let tk3Correct = combo.ones([2;2;2])
+            // printfn $"tk3Correct = {tk3Correct}"
+            // printfn $"tk3        = {tk3}"
+            Assert.That(tk3Correct.allclose(tk3,0.01))
+
+            let tk3r = tk3.avgpoolReverse1d(t, 3)
+            let tk3rCorrect = combo.full([2;2;6], 1.0/3.0)
+            // printfn $"tk3r        = {tk3r}"
+            // printfn $"tk3rCorrect        = {tk3rCorrect}"
+            Assert.That(tk3rCorrect.allclose(tk3r,0.01))
+
+    [<Test>]
+    member _.TestTensorAvgPoolReverse2D () =
+        for combo in Combos.Float32 do
+            let t = combo.ones([2;2;6;6])
+
+            let tk3 = t.avgpool2d(kernelSize=3)
+            let tk3Correct = combo.ones([2;2;2;2])
+            // printfn $"tk3Correct = {tk3Correct}"
+            // printfn $"tk3        = {tk3}"
+            Assert.That(tk3Correct.allclose(tk3,0.01))
+
+            let tk3r = tk3.avgpoolReverse2d(t, 3)
+            let tk3rCorrect = combo.full([2;2;6;6], 1.0/9.0)
+            // printfn $"tk3r        = {tk3r}"
+            // printfn $"tk3rCorrect        = {tk3rCorrect}"
+            Assert.That(tk3rCorrect.allclose(tk3r,0.01))
+
+    [<Test>]
+    member _.TestTensorAvgPoolReverse3D () =
+        for combo in Combos.Float32 do
+            let t = combo.ones([2;2;6;6;6])
+
+            let tk3 = t.avgpool3d(kernelSize=3)
+            let tk3Correct = combo.ones([2;2;2;2;2])
+            // printfn $"tk3Correct = {tk3Correct}"
+            // printfn $"tk3        = {tk3}"
+            Assert.That(tk3Correct.allclose(tk3,0.01))
+
+            let tk3r = tk3.avgpoolReverse3d(t, 3)
+            let tk3rCorrect = combo.full([2;2;6;6;6], 1.0/27.0)
+            // printfn $"tk3r        = {tk3r}"
+            // printfn $"tk3rCorrect        = {tk3rCorrect}"
+            Assert.That(tk3rCorrect.allclose(tk3r,0.01))
diff --git a/tests/TensorMath.Tests/TestOp.BMM.fs b/tests/TensorMath.Tests/TestOp.BMM.fs
new file mode 100644
index 0000000..fa53b4c
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.BMM.fs
@@ -0,0 +1,66 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorBMM () =
+    [<Test>]
+    member _.TestTensorBMM () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[[-1.0372e+00,  7.5673e-01,  1.9448e+00,  3.6433e+00, -3.9134e-01],
+                                     [-1.7011e+00,  3.0675e+00,  1.8387e+00, -2.3037e-01,  5.0916e-01],
+                                     [ 2.1869e+00,  1.5561e+00,  1.2905e+00, -3.5149e-03, -2.0392e+00],
+                                     [-1.0669e+00,  3.0033e-01,  1.2472e+00,  7.8584e-01, -5.0704e-01]],
+
+                                    [[-2.8406e-02, -7.2715e-01, -2.6762e-02, -7.6213e-02,  1.3507e+00],
+                                     [-1.0652e+00, -8.9129e-01,  1.3157e+00,  1.5385e+00, -7.5446e-02],
+                                     [ 1.0338e-01,  9.9040e-02,  8.3478e-01,  2.1243e+00,  1.4483e+00],
+                                     [-3.1956e-01,  1.1361e+00,  8.4474e-01, -1.2423e+00, -1.7816e+00]],
+
+                                    [[-4.4167e-01,  4.1456e-01,  9.4991e-01, -1.3340e+00, -2.4315e+00],
+                                     [-1.8150e+00,  1.1680e+00, -9.0262e-01, -4.3182e-01,  3.6071e-01],
+                                     [-1.3226e-02,  1.0893e+00,  7.8359e-01,  3.6028e-01,  2.2133e-01],
+                                     [-1.5645e+00,  2.5328e+00,  1.6512e+00,  1.5900e-01, -1.4043e+00]]])
+            let t2 = combo.tensor([[[ 0.3481,  0.9439],
+                                     [-0.5359, -1.7125],
+                                     [ 1.4898,  0.6685],
+                                     [-1.0406,  0.7051],
+                                     [-0.2785,  0.6121]],
+
+                                    [[ 0.4999,  0.5361],
+                                     [ 3.0867, -0.8065],
+                                     [-0.9747, -1.7684],
+                                     [-1.5903, -0.6835],
+                                     [-0.3202, -2.0839]],
+
+                                    [[ 0.9485,  0.0655],
+                                     [-0.5164,  0.4726],
+                                     [-0.7083,  0.5276],
+                                     [ 0.2138, -0.9645],
+                                     [ 0.1784,  0.3654]]])
+
+            let t3 = t1.bmm(t2)
+            let t3Correct = combo.tensor([[[-1.5514,  1.3546],
+                                             [ 0.6013, -5.4802],
+                                             [ 2.4214, -0.9885],
+                                             [ 0.6491, -0.4439]],
+
+                                            [[-2.5438, -2.1440],
+                                             [-6.9885, -3.0734],
+                                             [-4.2981, -5.9709],
+                                             [ 5.0698,  1.9803]],
+
+                                            [[-2.0247,  1.0662],
+                                             [-1.7133,  0.5051],
+                                             [-1.0136,  0.6607],
+                                             [-4.1779,  1.2990]]])
+
+            Assert.That(t3Correct.allclose(t3, 0.1))
+
diff --git a/tests/TensorMath.Tests/TestOp.Det.fs b/tests/TensorMath.Tests/TestOp.Det.fs
new file mode 100644
index 0000000..2ea1287
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.Det.fs
@@ -0,0 +1,40 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorDet () =
+    [<Test>]
+    member _.TestTensorDet () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t3x3 = combo.tensor([[ 1.3038, -0.8699,  1.2059],
+                                        [ 1.0837, -1.5076, -0.1286],
+                                        [-0.9857,  0.3633, -1.0049]])
+            let t3x3Det = t3x3.det()
+            let t3x3DetCorrect = combo.tensor(-0.3387)
+
+            Assert.That(t3x3DetCorrect.allclose(t3x3Det, 0.01))
+
+            let t4x2x2 = combo.tensor([[[-2.1301, -1.4122],
+                                         [-0.4353, -0.6708]],
+
+                                        [[ 0.0696, -1.3661],
+                                         [ 0.4162,  0.0663]],
+
+                                        [[-1.3677, -0.6721],
+                                         [ 0.6547,  0.5127]],
+
+                                        [[-1.1081,  1.0203],
+                                         [-0.1355,  0.0641]]])
+            let t4x2x2Det = t4x2x2.det()
+            let t4x2x2DetCorrect = combo.tensor([ 0.8141,  0.5732, -0.2612,  0.0672])
+
+            Assert.That(t4x2x2DetCorrect.allclose(t4x2x2Det, 0.01))
+
diff --git a/tests/TensorMath.Tests/TestOp.Inv.fs b/tests/TensorMath.Tests/TestOp.Inv.fs
new file mode 100644
index 0000000..4f5f5ca
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.Inv.fs
@@ -0,0 +1,52 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorInv () =
+    [<Test>]
+    member _.TestTensorInv () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t3x3 = combo.tensor([[ 1.3038, -0.8699,  1.2059],
+                                        [ 1.0837, -1.5076, -0.1286],
+                                        [-0.9857,  0.3633, -1.0049]])
+            let t3x3Inv = t3x3.inv()
+            let t3x3InvCorrect = combo.tensor([[-4.6103,  1.2872, -5.6974],
+                                                [-3.5892,  0.3586, -4.3532],
+                                                [ 3.2248, -1.1330,  3.0198]])
+
+            Assert.That(t3x3InvCorrect.allclose(t3x3Inv, 0.01))
+
+            let t4x2x2 = combo.tensor([[[-2.1301, -1.4122],
+                                         [-0.4353, -0.6708]],
+
+                                        [[ 0.0696, -1.3661],
+                                         [ 0.4162,  0.0663]],
+
+                                        [[-1.3677, -0.6721],
+                                         [ 0.6547,  0.5127]],
+
+                                        [[-1.1081,  1.0203],
+                                         [-0.1355,  0.0641]]])
+            let t4x2x2Inv = t4x2x2.inv()
+            let t4x2x2InvCorrect = combo.tensor([[[ -0.8239,   1.7344],
+                                                     [  0.5346,  -2.6162]],
+
+                                                    [[  0.1156,   2.3836],
+                                                     [ -0.7261,   0.1214]],
+
+                                                    [[ -1.9629,  -2.5729],
+                                                     [  2.5065,   5.2359]],
+
+                                                    [[  0.9526, -15.1662],
+                                                     [  2.0147, -16.4717]]])
+
+            Assert.That(t4x2x2InvCorrect.allclose(t4x2x2Inv, 0.01))
+
diff --git a/tests/TensorMath.Tests/TestOp.Norm.fs b/tests/TensorMath.Tests/TestOp.Norm.fs
new file mode 100644
index 0000000..d6644ac
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.Norm.fs
@@ -0,0 +1,127 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorNorm () =
+    [<Test>]
+    member _.TestTensornorm () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([[ -0.7868,  -2.5744,   6.1267, 0.],
+                                    [ -6.3106,  -9.7653,   7.7777,  16.2869],
+                                    [-10.8601,  15.1932,  0.,   1.7327]])
+
+            let n0 = t.norm(0.)
+            let n0Dim0 = t.norm(0., dim=0)
+            let n0Dim1 = t.norm(0., dim=1)
+            let n0Dim0KeepDim = t.norm(0., dim=0, keepDim=true)
+            let n0Dim1KeepDim = t.norm(0., dim=1, keepDim=true)
+
+            let n0Correct = combo.tensor(10.)
+            let n0Dim0Correct = combo.tensor([3., 3., 2., 2.])
+            let n0Dim1Correct = combo.tensor([3., 4., 3.])
+            let n0Dim0KeepDimCorrect = combo.tensor([[3., 3., 2., 2.]])
+            let n0Dim1KeepDimCorrect = combo.tensor([[3.], [4.], [3.]])
+
+            Assert.That(n0Correct.allclose(n0, 0.01))
+            Assert.That(n0Dim0Correct.allclose(n0Dim0, 0.01))
+            Assert.That(n0Dim1Correct.allclose(n0Dim1, 0.01))
+            Assert.That(n0Dim0KeepDimCorrect.allclose(n0Dim0KeepDim, 0.01))
+            Assert.That(n0Dim1KeepDimCorrect.allclose(n0Dim1KeepDim, 0.01))
+
+            let n1 = t.norm(1.)
+            let n1Dim0 = t.norm(1., dim=0)
+            let n1Dim1 = t.norm(1., dim=1)
+            let n1Dim0KeepDim = t.norm(1., dim=0, keepDim=true)
+            let n1Dim1KeepDim = t.norm(1., dim=1, keepDim=true)
+
+            let n1Correct = combo.tensor(77.4144)
+            let n1Dim0Correct = combo.tensor([17.9575, 27.5329, 13.9044, 18.0196])
+            let n1Dim1Correct = combo.tensor([ 9.4879, 40.1405, 27.7860])
+            let n1Dim0KeepDimCorrect = combo.tensor([[17.9575, 27.5329, 13.9044, 18.0196]])
+            let n1Dim1KeepDimCorrect = combo.tensor([[ 9.4879], [40.1405], [27.7860]])
+
+            Assert.That(n1Correct.allclose(n1, 0.01))
+            Assert.That(n1Dim0Correct.allclose(n1Dim0, 0.01))
+            Assert.That(n1Dim1Correct.allclose(n1Dim1, 0.01))
+            Assert.That(n1Dim0KeepDimCorrect.allclose(n1Dim0KeepDim, 0.01))
+            Assert.That(n1Dim1KeepDimCorrect.allclose(n1Dim1KeepDim, 0.01))
+
+            let n2 = t.norm(2.)
+            let n2Dim0 = t.norm(2., dim=0)
+            let n2Dim1 = t.norm(2., dim=1)
+            let n2Dim0KeepDim = t.norm(2., dim=0, keepDim=true)
+            let n2Dim1KeepDim = t.norm(2., dim=1, keepDim=true)
+
+            let n2Correct = combo.tensor(29.2831)
+            let n2Dim0Correct = combo.tensor([12.5851, 18.2434,  9.9010, 16.3788])
+            let n2Dim1Correct = combo.tensor([ 6.6920, 21.4695, 18.7557])
+            let n2Dim0KeepDimCorrect = combo.tensor([[12.5851, 18.2434,  9.9010, 16.3788]])
+            let n2Dim1KeepDimCorrect = combo.tensor([[ 6.6920], [21.4695], [18.7557]])
+
+            Assert.That(n2Correct.allclose(n2, 0.01))
+            Assert.That(n2Dim0Correct.allclose(n2Dim0, 0.01))
+            Assert.That(n2Dim1Correct.allclose(n2Dim1, 0.01))
+            Assert.That(n2Dim0KeepDimCorrect.allclose(n2Dim0KeepDim, 0.01))
+            Assert.That(n2Dim1KeepDimCorrect.allclose(n2Dim1KeepDim, 0.01))
+
+            let nInf = t.norm(System.Double.PositiveInfinity)
+            let nInfDim0 = t.norm(System.Double.PositiveInfinity, dim=0)
+            let nInfDim1 = t.norm(System.Double.PositiveInfinity, dim=1)
+            let nInfDim0KeepDim = t.norm(System.Double.PositiveInfinity, dim=0, keepDim=true)
+            let nInfDim1KeepDim = t.norm(System.Double.PositiveInfinity, dim=1, keepDim=true)
+
+            let nInfCorrect = combo.tensor(16.2869)
+            let nInfDim0Correct = combo.tensor([10.8601, 15.1932,  7.7777, 16.2869])
+            let nInfDim1Correct = combo.tensor([ 6.1267, 16.2869, 15.1932])
+            let nInfDim0KeepDimCorrect = combo.tensor([[10.8601, 15.1932,  7.7777, 16.2869]])
+            let nInfDim1KeepDimCorrect = combo.tensor([[ 6.1267], [16.2869], [15.1932]])
+
+            Assert.That(nInfCorrect.allclose(nInf, 0.01))
+            Assert.That(nInfDim0Correct.allclose(nInfDim0, 0.01))
+            Assert.That(nInfDim1Correct.allclose(nInfDim1, 0.01))
+            Assert.That(nInfDim0KeepDimCorrect.allclose(nInfDim0KeepDim, 0.01))
+            Assert.That(nInfDim1KeepDimCorrect.allclose(nInfDim1KeepDim, 0.01))
+
+            let nNegInf = t.norm(System.Double.NegativeInfinity)
+            let nNegInfDim0 = t.norm(System.Double.NegativeInfinity, dim=0)
+            let nNegInfDim1 = t.norm(System.Double.NegativeInfinity, dim=1)
+            let nNegInfDim0KeepDim = t.norm(System.Double.NegativeInfinity, dim=0, keepDim=true)
+            let nNegInfDim1KeepDim = t.norm(System.Double.NegativeInfinity, dim=1, keepDim=true)
+
+            let nNegInfCorrect = combo.tensor(0.)
+            let nNegInfDim0Correct = combo.tensor([0.7868, 2.5744, 0.0000, 0.0000])
+            let nNegInfDim1Correct = combo.tensor([0.0000, 6.3106, 0.0000])
+            let nNegInfDim0KeepDimCorrect = combo.tensor([[0.7868, 2.5744, 0.0000, 0.0000]])
+            let nNegInfDim1KeepDimCorrect = combo.tensor([[0.0000], [6.3106], [0.0000]])
+
+            Assert.That(nNegInfCorrect.allclose(nNegInf, 0.01))
+            Assert.That(nNegInfDim0Correct.allclose(nNegInfDim0, 0.01))
+            Assert.That(nNegInfDim1Correct.allclose(nNegInfDim1, 0.01))
+            Assert.That(nNegInfDim0KeepDimCorrect.allclose(nNegInfDim0KeepDim, 0.01))
+            Assert.That(nNegInfDim1KeepDimCorrect.allclose(nNegInfDim1KeepDim, 0.01))
+
+            let nOther = t.norm(3.5)
+            let nOtherDim0 = t.norm(3.5, dim=0)
+            let nOtherDim1 = t.norm(3.5, dim=1)
+            let nOtherDim0KeepDim = t.norm(3.5, dim=0, keepDim=true)
+            let nOtherDim1KeepDim = t.norm(3.5, dim=1, keepDim=true)
+
+            let nOtherCorrect = combo.tensor(20.7627)
+            let nOtherDim0Correct = combo.tensor([11.3016, 16.0621,  8.6211, 16.2887])
+            let nOtherDim1Correct = combo.tensor([ 6.2108, 17.4708, 16.4092])
+            let nOtherDim0KeepDimCorrect = combo.tensor([[11.3016, 16.0621,  8.6211, 16.2887]])
+            let nOtherDim1KeepDimCorrect = combo.tensor([[ 6.2108], [17.4708], [16.4092]])
+
+            Assert.That(nOtherCorrect.allclose(nOther, 0.01))
+            Assert.That(nOtherDim0Correct.allclose(nOtherDim0, 0.01))
+            Assert.That(nOtherDim1Correct.allclose(nOtherDim1, 0.01))
+            Assert.That(nOtherDim0KeepDimCorrect.allclose(nOtherDim0KeepDim, 0.01))
+            Assert.That(nOtherDim1KeepDimCorrect.allclose(nOtherDim1KeepDim, 0.01))
\ No newline at end of file
diff --git a/tests/TensorMath.Tests/TestOp.Outer.fs b/tests/TensorMath.Tests/TestOp.Outer.fs
new file mode 100644
index 0000000..b9e1155
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.Outer.fs
@@ -0,0 +1,47 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorOuter () =
+    [<Test>]
+    member _.TestTensorOuter () =
+        for combo in Combos.FloatingPointExcept16s do
+            let a1 = combo.tensor([ 1.7865,  1.2723,  0.2065, -0.4601,  0.3218])
+            let b1 = combo.tensor([ 2.1136,  1.0551, -0.4575])
+
+            let a1outerb1 = a1.outer(b1)
+            let a1outerb1Correct = combo.tensor([[ 3.7759,  1.8849, -0.8173],
+                                                    [ 2.6891,  1.3424, -0.5820],
+                                                    [ 0.4365,  0.2179, -0.0945],
+                                                    [-0.9725, -0.4854,  0.2105],
+                                                    [ 0.6801,  0.3395, -0.1472]])
+
+            Assert.That(a1outerb1Correct.allclose(a1outerb1, 0.01))
+
+            let a2 = combo.tensor([[ 1.7865,  1.2723,  0.2065, -0.4601,  0.3218],
+                                    [-0.2400, -0.1650, -1.1463,  0.0578,  1.5240]])
+            let b2 = combo.tensor([[ 2.1136,  1.0551, -0.4575],
+                                     [ 1.1928, -2.3803,  0.3160]])
+
+            let a2outerb2 = a2.outer(b2)
+            let a2outerb2Correct = combo.tensor([[[ 3.7759,  1.8849, -0.8173],
+                                                    [ 2.6891,  1.3424, -0.5820],
+                                                    [ 0.4365,  0.2179, -0.0945],
+                                                    [-0.9725, -0.4854,  0.2105],
+                                                    [ 0.6801,  0.3395, -0.1472]],
+                                                    
+                                                    [[-0.2863,  0.5713, -0.0758],
+                                                        [-0.1968,  0.3927, -0.0521],
+                                                        [-1.3672,  2.7284, -0.3622],
+                                                        [ 0.0690, -0.1376,  0.0183],
+                                                        [ 1.8177, -3.6275,  0.4816]]])
+
+            Assert.That(a2outerb2Correct.allclose(a2outerb2, 0.01))
\ No newline at end of file
diff --git a/tests/TensorMath.Tests/TestOp.Solve.fs b/tests/TensorMath.Tests/TestOp.Solve.fs
new file mode 100644
index 0000000..c517bb1
--- /dev/null
+++ b/tests/TensorMath.Tests/TestOp.Solve.fs
@@ -0,0 +1,101 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+
+[<TestFixture>]
+type TestTensorSolve () =
+    [<Test>]
+    member _.TestTensorSolve () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t3x3 = combo.tensor([[-1.1606,  0.6579,  1.0674],
+                                     [-1.0226,  0.2406, -0.5414],
+                                     [ 0.1195,  1.2423,  0.0889]])
+            let t3 = combo.tensor([ 0.6791,  0.5497, -0.3624])
+            let t3x1 = t3.unsqueeze(1)
+
+            let t3x3Solvet3 = t3x3.solve(t3)
+            let t3x3Solvet3Correct = combo.tensor([-0.6392, -0.2364,  0.0869])
+
+            let t3x3Solvet3x1 = t3x3.solve(t3x1)
+            let t3x3Solvet3x1Correct = t3x3Solvet3Correct.unsqueeze(1)
+
+            Assert.That(t3x3Solvet3Correct.allclose(t3x3Solvet3, 0.01))
+            Assert.That(t3x3Solvet3x1Correct.allclose(t3x3Solvet3x1, 0.01))
+
+            let t3x2 = combo.tensor([[-1.0439,  0.9510],
+                                        [-0.9118,  0.5726],
+                                        [-0.3161,  0.1080]])
+
+            let t3x3Solvet3x2 = t3x3.solve(t3x2)
+            let t3x3Solvet3x2Correct = combo.tensor([[ 0.7754, -0.6068],
+                                                        [-0.3341,  0.1347],
+                                                        [ 0.0710,  0.1482]])
+
+            Assert.That(t3x3Solvet3x2Correct.allclose(t3x3Solvet3x2, 0.01))
+
+    [<Test>]
+    member _.TestTensorSolveBatched () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t4x2x2 = combo.tensor([[[-0.0184,  0.7381],
+                                          [ 0.3093, -0.4847]],
+                                 
+                                         [[ 0.0368,  1.2592],
+                                          [-0.1828, -0.2979]],
+                                 
+                                         [[-1.4190, -0.8507],
+                                          [ 0.7187,  2.7166]],
+                                 
+                                         [[-0.2591, -1.3985],
+                                          [ 1.7918,  1.2014]]])
+            let t4x2 = combo.tensor([[ 1.0514, -1.0258],
+                                     [ 0.0513,  0.0240],
+                                     [-0.7468, -0.6901],
+                                     [ 1.7193, -1.5342]])
+            let t4x2x1 = t4x2.unsqueeze(2)
+
+            let t4x2x2Solvet4x2 = t4x2x2.solve(t4x2)
+            let t4x2x2Solvet4x2Correct = combo.tensor([[-1.1286,  1.3964],
+                                                        [-0.2075,  0.0468],
+                                                        [ 0.8065, -0.4674],
+                                                        [-0.0365, -1.2226]])
+
+            let t4x2x2Solvet4x2x1 = t4x2x2.solve(t4x2x1)
+            let t4x2x2Solvet4x2x1Correct = t4x2x2Solvet4x2Correct.unsqueeze(2)
+
+            Assert.That(t4x2x2Solvet4x2Correct.allclose(t4x2x2Solvet4x2, 0.01))
+            Assert.That(t4x2x2Solvet4x2x1Correct.allclose(t4x2x2Solvet4x2x1, 0.01))
+
+            let t4x2x3 = combo.tensor([[[-0.6377,  0.2087, -0.8464],
+                                         [-0.3898,  1.1024, -0.2743]],
+
+                                        [[-0.9679,  0.0065, -0.8171],
+                                         [-1.1492,  0.5241, -0.0426]],
+
+                                        [[-1.9881,  1.1760,  1.2151],
+                                         [ 1.0708, -0.0690, -1.1514]],
+
+                                        [[-0.7452,  0.1944,  0.5586],
+                                         [-0.2153,  0.0611, -0.3548]]])
+
+            let t4x2x2Solvet4x2x3 = t4x2x2.solve(t4x2x3)
+            let t4x2x2Solvet4x2x3Correct = combo.tensor([[[-2.7204,  4.1705, -2.7931],
+                                                             [-0.9317,  0.3866, -1.2162]],
+
+                                                            [[ 7.9153, -3.0187,  1.3551],
+                                                             [-0.9997,  0.0933, -0.6885]],
+
+                                                            [[ 1.3843, -0.9669, -0.7157],
+                                                             [ 0.0279,  0.2304, -0.2345]],
+
+                                                            [[-0.5451,  0.1453,  0.0797],
+                                                             [ 0.6338, -0.1659, -0.4142]]])
+
+            Assert.That(t4x2x2Solvet4x2x3Correct.allclose(t4x2x2Solvet4x2x3, 0.01))
+
diff --git a/tests/TensorMath.Tests/TestRandom.fs b/tests/TensorMath.Tests/TestRandom.fs
new file mode 100644
index 0000000..7ef3eeb
--- /dev/null
+++ b/tests/TensorMath.Tests/TestRandom.fs
@@ -0,0 +1,34 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath.Util
+
+[<TestFixture>]
+type TestRandom () =
+
+    [<Test>]
+    member _.TestRandomSeed () =
+        Random.Seed(1)
+        let a1 = Random.Uniform()
+        Random.Seed(1)
+        let a2 = Random.Uniform()
+        let a3 = Random.Uniform()
+
+        Assert.AreEqual(a1, a2)
+        Assert.AreNotEqual(a2, a3)
+
+    [<Test>]
+    member _.TestRandomUUID () =
+        Random.Seed(1)
+        let a1 = Random.UUID()
+        Random.Seed(1)
+        let a2 = Random.UUID()
+        let a3 = Random.UUID()
+
+        Assert.AreEqual(a1, a2)
+        Assert.AreNotEqual(a2, a3)
diff --git a/tests/TensorMath.Tests/TestTensor.Conv.fs b/tests/TensorMath.Tests/TestTensor.Conv.fs
new file mode 100644
index 0000000..0f0141d
--- /dev/null
+++ b/tests/TensorMath.Tests/TestTensor.Conv.fs
@@ -0,0 +1,2279 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+[<TestFixture>]
+type TestTensorConvolutions () =
+
+    [<Test>]
+    member _.TestTensorConv1D () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([[[0.3460; 0.4414; 0.2384; 0.7905; 0.2267];
+                                    [0.5161; 0.9032; 0.6741; 0.6492; 0.8576];
+                                    [0.3373; 0.0863; 0.8137; 0.2649; 0.7125];
+                                    [0.7144; 0.1020; 0.0437; 0.5316; 0.7366]];
+
+                                    [[0.9871; 0.7569; 0.4329; 0.1443; 0.1515];
+                                     [0.5950; 0.7549; 0.8619; 0.0196; 0.8741];
+                                     [0.4595; 0.7844; 0.3580; 0.6469; 0.7782];
+                                     [0.0130; 0.8869; 0.8532; 0.2119; 0.8120]];
+
+                                    [[0.5163; 0.5590; 0.5155; 0.1905; 0.4255];
+                                     [0.0823; 0.7887; 0.8918; 0.9243; 0.1068];
+                                     [0.0337; 0.2771; 0.9744; 0.0459; 0.4082];
+                                     [0.9154; 0.2569; 0.9235; 0.9234; 0.3148]]])
+            let t2 = combo.tensor([[[0.4941; 0.8710; 0.0606];
+                                    [0.2831; 0.7930; 0.5602];
+                                    [0.0024; 0.1236; 0.4394];
+                                    [0.9086; 0.1277; 0.2450]];
+
+                                   [[0.5196; 0.1349; 0.0282];
+                                    [0.1749; 0.6234; 0.5502];
+                                    [0.7678; 0.0733; 0.3396];
+                                    [0.6023; 0.6546; 0.3439]]])
+
+            let t3 = dsharp.conv1d(t1, t2)
+            let t3Correct = combo.tensor([[[2.8516; 2.0732; 2.6420];
+                                           [2.3239; 1.7078; 2.7450]];
+
+                                          [[3.0127; 2.9651; 2.5219];
+                                           [3.0899; 3.1496; 2.4110]];
+
+                                          [[3.4749; 2.9038; 2.7131];
+                                           [2.7692; 2.9444; 3.2554]]])
+
+            Assert.That(t3.allclose(t3Correct, 0.01))
+
+            let t3p1 = dsharp.conv1d(t1, t2, padding=1)
+            let t3p1Correct = combo.tensor([[[1.4392; 2.8516; 2.0732; 2.6420; 2.1177];
+                                             [1.4345; 2.3239; 1.7078; 2.7450; 2.1474]];
+
+                                            [[2.4208; 3.0127; 2.9651; 2.5219; 1.2960];
+                                             [1.5544; 3.0899; 3.1496; 2.4110; 1.8567]];
+
+                                            [[1.2965; 3.4749; 2.9038; 2.7131; 1.7408];
+                                             [1.3549; 2.7692; 2.9444; 3.2554; 1.2120]]])
+
+            Assert.That(t3p1.allclose(t3p1Correct, 0.01))
+
+            let t3p2 = dsharp.conv1d(t1, t2, padding=2)
+            let t3p2Correct = combo.tensor([[[0.6333; 1.4392; 2.8516; 2.0732; 2.6420; 2.1177; 1.0258];
+                                             [0.6539; 1.4345; 2.3239; 1.7078; 2.7450; 2.1474; 1.2585]];
+
+                                            [[0.5982; 2.4208; 3.0127; 2.9651; 2.5219; 1.2960; 1.0620];
+                                             [0.5157; 1.5544; 3.0899; 3.1496; 2.4110; 1.8567; 1.3182]];
+
+                                            [[0.3165; 1.2965; 3.4749; 2.9038; 2.7131; 1.7408; 0.5275];
+                                             [0.3861; 1.3549; 2.7692; 2.9444; 3.2554; 1.2120; 0.7428]]])
+
+            Assert.That(t3p2.allclose(t3p2Correct, 0.01))
+
+            let t3s2 = dsharp.conv1d(t1, t2, stride=2)
+            let t3s2Correct = combo.tensor([[[2.8516; 2.6420];
+                                             [2.3239; 2.7450]];
+
+                                            [[3.0127; 2.5219];
+                                             [3.0899; 2.4110]];
+
+                                            [[3.4749; 2.7131];
+                                             [2.7692; 3.2554]]])
+
+            Assert.That(t3s2.allclose(t3s2Correct, 0.01))
+
+            let t3s3 = dsharp.conv1d(t1, t2, stride=3)
+            let t3s3Correct = combo.tensor([[[2.8516];
+                                             [2.3239]];
+
+                                            [[3.0127];
+                                             [3.0899]];
+
+                                            [[3.4749];
+                                             [2.7692]]])
+
+            Assert.That(t3s3.allclose(t3s3Correct, 0.01))
+
+            let t3s2p1 = dsharp.conv1d(t1, t2, stride=2, padding=1)
+            let t3s2p1Correct = combo.tensor([[[1.4392; 2.0732; 2.1177];
+                                                 [1.4345; 1.7078; 2.1474]];
+
+                                                [[2.4208; 2.9651; 1.2960];
+                                                 [1.5544; 3.1496; 1.8567]];
+
+                                                [[1.2965; 2.9038; 1.7408];
+                                                 [1.3549; 2.9444; 1.2120]]])
+
+            Assert.That(t3s2p1.allclose(t3s2p1Correct, 0.01))
+
+            let t3s3p2 = dsharp.conv1d(t1, t2, stride=3, padding=2)
+            let t3s3p2Correct = combo.tensor([[[0.6333; 2.0732; 1.0258];
+                                                 [0.6539; 1.7078; 1.2585]];
+
+                                                [[0.5982; 2.9651; 1.0620];
+                                                 [0.5157; 3.1496; 1.3182]];
+
+                                                [[0.3165; 2.9038; 0.5275];
+                                                 [0.3861; 2.9444; 0.7428]]])
+        
+            Assert.That(t3s3p2.allclose(t3s3p2Correct, 0.01))
+
+            let t3d2 = dsharp.conv1d(t1, t2, dilation=2)
+            let t3d2Correct = combo.tensor([[[2.8030];
+                                             [2.4735]];
+
+                                            [[2.9226];
+                                             [3.1868]];
+
+                                            [[2.8469];
+                                             [2.4790]]])
+
+            Assert.That(t3d2.allclose(t3d2Correct, 0.01))
+
+            let t3p2d3 = dsharp.conv1d(t1, t2, padding=2, dilation=3)
+            let t3p2d3Correct = combo.tensor([[[2.1121; 0.8484; 2.2709];
+                                                 [1.6692; 0.5406; 1.8381]];
+
+                                                [[2.5078; 1.2137; 0.9173];
+                                                 [2.2395; 1.1805; 1.1954]];
+
+                                                [[1.5215; 1.3946; 2.1327];
+                                                 [1.0732; 1.3014; 2.0696]]])
+
+            Assert.That(t3p2d3.allclose(t3p2d3Correct, 0.01))
+
+            let t3s3p6d3 = dsharp.conv1d(t1, t2, stride=3, padding=6, dilation=3)
+            let t3s3p6d3Correct = combo.tensor([[[0.6333; 1.5018; 2.2709; 1.0580];
+                                                 [0.6539; 1.5130; 1.8381; 1.0479]];
+
+                                                [[0.5982; 1.7459; 0.9173; 0.2709];
+                                                 [0.5157; 0.8537; 1.1954; 0.7027]];
+
+                                                [[0.3165; 1.4118; 2.1327; 1.1949];
+                                                 [0.3861; 1.5697; 2.0696; 0.8520]]])
+
+            Assert.That(t3s3p6d3.allclose(t3s3p6d3Correct, 0.01))
+
+            let t3b1 = t1[0].unsqueeze(0).conv1d(t2)
+            let t3b1Correct = t3Correct[0].unsqueeze(0)
+            Assert.That(t3b1.allclose(t3b1Correct, 0.01))
+
+            let t3b1s2 = t1[0].unsqueeze(0).conv1d(t2, stride = 2)
+            let t3b1s2Correct = t3s2Correct[0].unsqueeze(0)
+
+            Assert.That(t3b1s2.allclose(t3b1s2Correct, 0.01))
+
+        for combo in Combos.Integral do 
+            let x = combo.ones([1;4;4])
+            let y = combo.ones([1;4;4])
+            let z = dsharp.conv1d(x,y)
+            let zCorrect = combo.tensor([[[16]]])
+            Assert.CheckEqual(z, zCorrect)
+               
+
+        // check types must always match
+        for dtype1 in Dtypes.All do 
+            for dtype2 in Dtypes.All do 
+                if dtype1 <> dtype2 then 
+                    let x = dsharp.zeros([1;4;4], dtype=dtype1)
+                    let y = dsharp.zeros([1;4;4], dtype=dtype2)
+                    isException(fun () -> dsharp.conv1d(x,y))
+
+        for combo in Combos.Bool do 
+            let x = combo.zeros([1;4;4])
+            let y = combo.zeros([1;4;4])
+            isInvalidOp(fun () -> dsharp.conv1d(x,y))
+
+    [<Test>]
+    member _.TestTensorConv2D () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([[[[ 10.7072,  -5.0993,   3.6884,   2.0982],
+                                     [ -6.4356,   0.6351,  -2.3156,  -1.3384],
+                                     [ -5.1846,   0.6805, -14.1961,   0.8657],
+                                     [ -8.8655,  -7.1694,  -3.4903,  -2.9479]],
+
+                                    [[  2.5630,  -2.2935,  -0.8665,   6.7999],
+                                     [  1.8098,   3.2082,   2.3160,  -4.7734],
+                                     [ 14.7205,   0.9631,   8.1039,   6.7437],
+                                     [  3.7847,  -5.9792,  -2.7371,  -7.8548]]],
+
+
+                                   [[[  3.5499,   0.9546,  -7.5715,   2.8211],
+                                     [ -1.2659,   5.2366,  -7.2322,  -5.8877],
+                                     [ -2.8041,   2.1746,   2.2397,   0.1242],
+                                     [  1.8172,  -0.3989,  -0.2394,   7.1078]],
+
+                                    [[ -3.7765,   2.1584,   6.8627,  -4.1471],
+                                     [  4.6748,   7.9756,  -6.0065,   2.0826],
+                                     [  5.1038,  -5.5801,  -4.4420,  -2.9498],
+                                     [  0.1037,   4.6578,   3.0760,  -4.9566]]]])
+            let t2 = combo.tensor([[[[-5.6745, -1.9422,  4.1369],
+                                     [ 4.4623,  4.8385,  0.8345],
+                                     [ 1.3015,  0.0708,  3.8132]],
+
+                                     [[ 0.9448, -1.9028, -8.0195],
+                                      [-5.3200,  0.4264, -1.2142],
+                                      [ 1.4442, -7.3623, 14.5340]]],
+
+
+                                    [[[-3.3486, -3.2725, -3.4595],
+                                      [-5.0818, -0.5769, -3.5363],
+                                      [ 3.1498,  0.6293, -1.2527]],
+
+                                     [[ 3.2029,  3.9409, 12.6924],
+                                      [ 4.1056, -3.2890,  2.4071],
+                                      [ 4.2373, -1.8852,  4.4640]]],
+
+
+                                    [[[ 4.0582, -4.6075,  6.2574],
+                                      [-0.9867,  3.4303, -1.9686],
+                                      [-5.0618,  5.0045, -2.0878]],
+
+                                     [[ 1.0605, -3.2697, -1.9856],
+                                      [-6.5763, -6.3535,  7.2228],
+                                      [15.1009,  4.9045,  5.1197]]]])
+
+            let t3 = dsharp.conv2d(t1, t2)
+            let t3Correct = combo.tensor([[[[  10.6089;   -1.4459];
+                                            [-132.3437; -165.9882]];
+
+                                             [[  97.8425;   81.2322];
+                                              [ 215.2763; -112.2244]];
+
+                                             [[ 427.2891; -101.3674];
+                                              [ -35.6012; -168.9572]]];
+
+
+                                            [[[-127.6157;  -35.6266];
+                                              [  -7.7668;  -47.1349]];
+
+                                             [[ 104.2333;   28.7020];
+                                              [  27.1404;    8.1246]];
+
+                                             [[-106.0468;  -94.3428];
+                                              [ -78.6259;  136.6283]]]])
+
+            let t3p1 = dsharp.conv2d(t1, t2, padding=1)
+            let t3p1Correct = combo.tensor([[[[  86.6988;    8.1164;  -85.8172;   69.5001];
+                                              [-154.2592;   10.6089;   -1.4459; -126.2889];
+                                              [-176.1860; -132.3437; -165.9882;  -23.2585];
+                                              [ -62.8550; -180.0650;  -52.4599;   55.0733]];
+
+                                             [[   3.9697;  -53.5450;   16.3075;  -35.2008];
+                                              [ -60.7372;   97.8425;   81.2322;   20.0075];
+                                              [  -9.2216;  215.2763; -112.2244;   73.8351];
+                                              [  88.4748;  308.1942;  176.2158;  131.2712]];
+
+                                             [[   5.6857;   51.6497;  106.6138;  -17.3603];
+                                              [ -46.9604;  427.2891; -101.3674;  226.5788];
+                                              [-125.8047;  -35.6012; -168.9572; -141.2721];
+                                              [-105.4274; -132.2796;   35.6026;  -13.8173]]];
+
+
+                                            [[[ 115.1200; -141.3008;   36.3188;  -92.2498];
+                                              [-133.0979; -127.6157;  -35.6266;   42.1693];
+                                              [  14.0058;   -7.7668;  -47.1349;  116.9311];
+                                              [  52.3284;   75.6948;   -3.7964;    3.3106]];
+
+                                             [[  31.6266;  -11.5726;   39.5819;   22.8020];
+                                              [ -55.3912;  104.2333;   28.7020;   24.2710];
+                                              [  91.6285;   27.1404;    8.1246;   38.5616];
+                                              [ -37.8251;  -83.1444; -113.7539;   -7.7113]];
+
+                                             [[  96.3737;  202.0389;  -68.9841;  -74.9820];
+                                              [ -11.1773; -106.0468;  -94.3428; -101.9384];
+                                              [ -44.8701;  -78.6259;  136.6283;   89.6921];
+                                              [  60.9218;   14.3467;  -86.6495;   49.3313]]]])
+
+            let t3p12 = dsharp.conv2d(t1, t2, paddings=[|1; 2|])
+            let t3p12Correct = combo.tensor([[[[   7.5867;   86.6988;    8.1164;  -85.8172;   69.5001;  -35.4485];
+                                              [ 210.3501; -154.2592;   10.6089;   -1.4459; -126.2889;   24.8066];
+                                              [ -42.1367; -176.1860; -132.3437; -165.9882;  -23.2585;  -44.1093];
+                                              [-151.4929;  -62.8550; -180.0650;  -52.4599;   55.0733;   30.0922]];
+
+                                             [[ -15.5535;    3.9697;  -53.5450;   16.3075;  -35.2008;   -7.1871];
+                                              [  94.8112;  -60.7372;   97.8425;   81.2322;   20.0075;   33.2591];
+                                              [ 127.0036;   -9.2216;  215.2763; -112.2244;   73.8351;  -30.0885];
+                                              [ 245.2360;   88.4748;  308.1942;  176.2158;  131.2712;    1.4327]];
+
+                                             [[  20.1355;    5.6857;   51.6497;  106.6138;  -17.3603; -112.0973];
+                                              [ 173.8400;  -46.9604;  427.2891; -101.3674;  226.5788;  145.8927];
+                                              [ 110.5519; -125.8047;  -35.6012; -168.9572; -141.2721; -159.3897];
+                                              [ -16.8828; -105.4274; -132.2796;   35.6026;  -13.8173;   65.2295]]];
+
+
+                                            [[[  70.6642;  115.1200; -141.3008;   36.3188;  -92.2498;   29.9960];
+                                              [ 101.7243; -133.0979; -127.6157;  -35.6266;   42.1693;  -61.3766];
+                                              [ -42.8275;   14.0058;   -7.7668;  -47.1349;  116.9311;   53.7170];
+                                              [ -51.1392;   52.3284;   75.6948;   -3.7964;    3.3106;   54.5939]];
+
+                                             [[   0.8100;   31.6266;  -11.5726;   39.5819;   22.8020;  -41.0836];
+                                              [ -18.1888;  -55.3912;  104.2333;   28.7020;   24.2710;    3.6328];
+                                              [  84.1016;   91.6285;   27.1404;    8.1246;   38.5616;   15.0304];
+                                              [  68.3032;  -37.8251;  -83.1444; -113.7539;   -7.7113;  -66.3344]];
+
+                                             [[  -7.6892;   96.3737;  202.0389;  -68.9841;  -74.9820;   85.7395];
+                                              [  97.9534;  -11.1773; -106.0468;  -94.3428; -101.9384;  -46.0084];
+                                              [  21.9169;  -44.8701;  -78.6259;  136.6283;   89.6921; -113.2355];
+                                              [ -30.5091;   60.9218;   14.3467;  -86.6495;   49.3313;   22.9582]]]])
+
+            let t3s2 = dsharp.conv2d(t1, t2, stride=2)
+            let t3s2Correct = combo.tensor([[[[  10.6089]];
+
+                                             [[  97.8425]];
+
+                                             [[ 427.2891]]];
+
+
+                                            [[[-127.6157]];
+
+                                             [[ 104.2333]];
+
+                                             [[-106.0468]]]])
+
+            let t3s13 = dsharp.conv2d(t1, t2, strides=[|1; 3|])
+            let t3s13Correct = combo.tensor([[[[  10.6089];
+                                              [-132.3437]];
+
+                                             [[  97.8425];
+                                              [ 215.2763]];
+
+                                             [[ 427.2891];
+                                              [ -35.6012]]];
+
+
+                                            [[[-127.6157];
+                                              [  -7.7668]];
+
+                                             [[ 104.2333];
+                                              [  27.1404]];
+
+                                             [[-106.0468];
+                                              [ -78.6259]]]])
+
+            let t3s2p1 = dsharp.conv2d(t1, t2, stride=2, padding=1)
+            let t3s2p1Correct = combo.tensor([[[[  86.6988;  -85.8172];
+                                                  [-176.1860; -165.9882]];
+
+                                                 [[   3.9697;   16.3075];
+                                                  [  -9.2216; -112.2244]];
+
+                                                 [[   5.6857;  106.6138];
+                                                  [-125.8047; -168.9572]]];
+
+
+                                                [[[ 115.1200;   36.3188];
+                                                  [  14.0058;  -47.1349]];
+
+                                                 [[  31.6266;   39.5819];
+                                                  [  91.6285;    8.1246]];
+
+                                                 [[  96.3737;  -68.9841];
+                                                  [ -44.8701;  136.6283]]]])
+
+            let t3s23p32 = dsharp.conv2d(t1, t2, strides=[2; 3], paddings=[3; 2])
+            let t3s23p32Correct = combo.tensor([[[[   0.0000,    0.0000],
+                                                      [   7.5866,  -85.8172],
+                                                      [ -42.1364, -165.9885],
+                                                      [ -67.0271,   97.8170]],
+
+                                                     [[   0.0000,    0.0000],
+                                                      [ -15.5537,   16.3071],
+                                                      [ 127.0034, -112.2239],
+                                                      [  78.7071,  -84.0060]],
+
+                                                     [[   0.0000,    0.0000],
+                                                      [  20.1357,  106.6139],
+                                                      [ 110.5519, -168.9587],
+                                                      [ -62.9899,  -13.2544]]],
+
+
+                                                    [[[   0.0000,    0.0000],
+                                                      [  70.6642,   36.3191],
+                                                      [ -42.8270,  -47.1361],
+                                                      [   6.6860,   70.4299]],
+
+                                                     [[   0.0000,    0.0000],
+                                                      [   0.8102,   39.5820],
+                                                      [  84.1018,    8.1256],
+                                                      [  -4.9704,  -58.3407]],
+
+                                                     [[   0.0000,    0.0000],
+                                                      [  -7.6887,  -68.9838],
+                                                      [  21.9173,  136.6280],
+                                                      [  11.1650,   48.6844]]]])
+        
+            let t3p1d2 = dsharp.conv2d(t1, t2, padding=1, dilation=2)
+            let t3p1d2Correct = combo.tensor([[[[ -72.7697,  -34.7305],
+                                                  [ -35.3463, -230.5320]],
+
+                                                 [[ -42.2859,   24.9292],
+                                                  [  96.3085,   25.1894]],
+
+                                                 [[-149.3111,   42.9268],
+                                                  [  73.8409, -159.8669]]],
+
+
+                                                [[[ -57.9600,  -88.2215],
+                                                  [  50.7950,  -52.7872]],
+
+                                                 [[ -43.4812,   49.7672],
+                                                  [ -47.4554,   76.3617]],
+
+                                                 [[ -25.4452,   -9.8843],
+                                                  [  35.7940,   27.9557]]]])
+
+            let t3p22d23 = dsharp.conv2d(t1, t2, paddings=[2;2], dilations=[2;3])
+            let t3p22d23Correct = combo.tensor([[[[-3.2693e+01, -4.3192e+01],
+                                                      [ 4.7954e+01,  9.6877e+00],
+                                                      [ 1.7971e+01, -7.0747e+01],
+                                                      [-4.4577e+01, -1.7964e+01]],
+
+                                                     [[ 9.0977e+00, -2.3489e+01],
+                                                      [-4.1579e+00, -3.3179e+00],
+                                                      [ 4.0888e+00, -3.3949e+01],
+                                                      [ 3.4366e+01,  2.7721e+01]],
+
+                                                     [[ 5.2087e+00, -1.3141e+01],
+                                                      [-8.3409e+01, -5.3549e+01],
+                                                      [ 2.7209e+01, -1.1435e+02],
+                                                      [-2.0424e-02,  8.5139e+00]]],
+
+
+                                                    [[[ 4.6776e+01, -8.4654e-01],
+                                                      [-5.5823e+00, -6.0218e+01],
+                                                      [ 2.1814e+00,  1.0590e+01],
+                                                      [-2.5290e+01,  2.5629e+01]],
+
+                                                     [[ 4.2384e+00, -8.4199e+00],
+                                                      [-3.8285e+01,  1.7978e+01],
+                                                      [ 2.2481e+01,  6.5141e+01],
+                                                      [-7.9511e-01, -9.9825e+00]],
+
+                                                     [[-2.6924e+01, -8.0152e+01],
+                                                      [-1.1862e+01,  2.7242e+01],
+                                                      [ 3.1457e+01,  4.8352e+01],
+                                                      [-8.1167e+01,  3.2597e+01]]]])
+
+            let t3s3p6d3 = dsharp.conv2d(t1, t2, stride=3, padding=6, dilation=3)
+            let t3s3p6d3Correct = combo.tensor([[[[  78.0793,   88.7191,  -32.2774,   12.5512],
+                                                      [  27.0241, -107.5002,   98.7433,  -41.9933],
+                                                      [  11.7470, -105.7288, -152.6583,   23.1514],
+                                                      [ -67.0271,   60.8134,   74.5546,    9.3066]],
+
+                                                     [[  -1.9717,   29.6326,   33.0870,   35.4221],
+                                                      [  -3.6938,  -49.7435,  -66.3994,  -25.3134],
+                                                      [  35.9503,   38.2935,   80.4125,   -2.5147],
+                                                      [  78.7071,  -45.5705,   20.5010,  -15.2868]],
+
+                                                     [[  -9.2327,   96.5872,   28.3565,   92.0639],
+                                                      [  35.3198,    5.5638,  -14.6744, -150.4814],
+                                                      [ 106.6989, -163.4741,   37.9205,   70.2904],
+                                                      [ -62.9899,   25.6233,    7.3010,  -20.2932]]],
+
+
+                                                    [[[ -41.3512,  -21.4615,   29.8981,   -2.3176],
+                                                      [  15.9843,  -22.6151,   87.3233,   36.7436],
+                                                      [  46.3618,   66.0061,   18.5348,   38.1597],
+                                                      [   6.6860,   65.4270,  -14.5871,  -45.0162]],
+
+                                                     [[ -21.3053,  -12.6932,    4.7727,   -8.6866],
+                                                      [ -23.4574,  -39.6679,   -1.5520,  -29.9771],
+                                                      [ -66.3903, -127.3519,  -46.1654,  -79.1997],
+                                                      [  -4.9704,  -93.0387,  -48.5467,  -39.6767]],
+
+                                                     [[ -26.7460,  -27.8782,  -81.2187,  -76.9048],
+                                                      [ -37.5283,  -29.9493,   60.9875,  -86.3384],
+                                                      [  26.8834,  -22.3392,   64.3614,   32.6334],
+                                                      [  11.1650,   45.6064,   -9.0581,   23.5884]]]])
+
+            let t3b1 = t1[0].unsqueeze(0).conv2d(t2)
+            let t3b1Correct = t3Correct[0].unsqueeze(0)
+            let t3b1s2 = t1[0].unsqueeze(0).conv2d(t2, stride = 2)
+            let t3b1s2Correct = t3s2Correct[0].unsqueeze(0)
+
+            // Assert.That(false)
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.That(t3p1.allclose(t3p1Correct, 0.01))
+            Assert.That(t3p12.allclose(t3p12Correct, 0.01))
+            Assert.That(t3s2.allclose(t3s2Correct, 0.01))
+            Assert.That(t3s13.allclose(t3s13Correct, 0.01))
+            Assert.That(t3s2p1.allclose(t3s2p1Correct, 0.01))
+            Assert.That(t3s23p32.allclose(t3s23p32Correct, 0.01))
+            Assert.That(t3p1d2.allclose(t3p1d2Correct, 0.01))
+            Assert.That(t3p22d23.allclose(t3p22d23Correct, 0.01))
+            Assert.That(t3s3p6d3.allclose(t3s3p6d3Correct, 0.01))
+            Assert.That(t3b1.allclose(t3b1Correct, 0.01))
+            Assert.That(t3b1s2.allclose(t3b1s2Correct, 0.01))
+
+        // check intergral types
+        for combo in Combos.Integral do 
+            let x = combo.ones([1;1;4;4])
+            let y = combo.ones([1;1;4;4])
+            let z = dsharp.conv2d(x, y)
+            let zCorrect = combo.tensor([[[[16]]]])
+            Assert.CheckEqual(z, zCorrect)
+
+        // check types must always match
+        for dtype1 in Dtypes.All do 
+            for dtype2 in Dtypes.All do 
+                if dtype1 <> dtype2 then 
+                    let x = dsharp.zeros([1;1;4;4], dtype=dtype1)
+                    let y = dsharp.zeros([1;1;4;4], dtype=dtype2)
+                    isException(fun () -> dsharp.conv2d(x,y, strides=[1;1]))
+
+        for combo in Combos.Bool do 
+            let x = combo.zeros([1;1;4;4])
+            let y = combo.zeros([1;1;4;4])
+            isInvalidOp(fun () -> dsharp.conv2d(x,y, strides=[1;1]))
+
+    [<Test>]
+    member _.TestTensorConv3D () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([[[[ 2.0403e+00,  5.0188e-01,  4.6880e-01,  8.0736e-01],
+                                       [-6.1190e-01,  6.1642e-01, -4.0588e-01, -2.9679e-01],
+                                       [-5.6210e-01,  3.6843e-01, -6.6630e-02, -1.3918e+00],
+                                       [-1.2988e+00,  9.6719e-01, -3.3539e-01,  8.7715e-01]],
+
+                                      [[-1.7863e+00, -1.1244e+00, -2.1417e-02,  6.4124e-01],
+                                       [ 7.5028e-01,  2.2587e-01, -1.2390e-01, -8.4495e-02],
+                                       [-1.1291e+00,  1.5644e+00, -2.0280e+00, -9.2168e-01],
+                                       [-9.2567e-01,  3.9768e-01,  1.0377e+00,  5.0193e-01]],
+
+                                      [[-5.3238e-01, -8.4971e-02,  5.3398e-01, -1.0695e+00],
+                                       [ 5.6227e-01,  2.3256e-01,  6.6780e-01, -7.1462e-01],
+                                       [-6.6682e-01, -3.5299e-01, -6.0286e-01, -1.0693e+00],
+                                       [ 1.2855e+00, -5.9239e-02, -1.6507e-01, -7.1905e-01]],
+
+                                      [[-4.1638e-01,  7.6894e-01, -8.3663e-01,  8.2333e-01],
+                                       [-1.4869e+00, -1.5159e+00,  8.6893e-01, -4.0507e-01],
+                                       [ 1.6423e+00,  1.1892e+00,  9.8311e-01, -4.7513e-01],
+                                       [ 1.4261e+00, -1.6494e+00,  8.3231e-02,  3.5143e-01]]],
+
+
+                                     [[[ 1.6732e+00, -2.3141e+00, -2.7201e-01,  4.8099e-02],
+                                       [ 1.4185e-01, -2.7953e-01,  2.0087e-01,  2.5665e+00],
+                                       [ 2.0306e+00,  1.3222e+00,  2.3076e-01,  4.5952e-01],
+                                       [ 8.8091e-01, -7.6203e-01,  1.4536e-03,  1.3817e-01]],
+
+                                      [[-1.8129e-01,  3.7236e-01,  4.3555e-01,  1.0214e+00],
+                                       [ 1.7297e-01, -3.5313e-01,  2.8694e+00, -4.7409e-01],
+                                       [-6.3609e-01,  3.4134e+00, -4.9251e-01, -3.8600e-01],
+                                       [ 6.8581e-02,  1.0088e+00,  3.0463e-01, -5.7993e-01]],
+
+                                      [[ 7.7506e-01,  1.5062e-01, -2.9680e-02, -1.9979e+00],
+                                       [ 6.7832e-01,  1.3433e+00,  1.0491e+00,  9.5303e-02],
+                                       [-1.4113e+00, -3.0230e-01, -3.2206e-01,  3.3161e-01],
+                                       [-1.0122e+00,  5.1443e-01,  6.5048e-02, -4.2270e-02]],
+
+                                      [[ 1.2150e+00, -1.4316e+00, -2.9044e-01, -7.3760e-01],
+                                       [ 3.5693e-01,  1.0187e+00,  1.1133e+00, -4.1039e-01],
+                                       [-1.7768e+00, -2.2549e-01,  2.7584e-01, -1.2234e+00],
+                                       [-2.9351e-01, -5.3639e-01, -1.2375e+00,  8.3979e-03]]]]).unsqueeze(0)
+            let t2 = combo.tensor([[[[-0.5868, -0.6268,  0.2067],
+                                       [ 0.0902, -0.2625,  0.4332],
+                                       [-2.3743,  0.4579,  1.1151]],
+
+                                      [[-0.6703, -0.4771,  1.5989],
+                                       [-0.8629,  0.0367, -1.7918],
+                                       [-0.1023,  0.0615, -1.3259]],
+
+                                      [[ 0.5963,  0.3167,  0.8568],
+                                       [ 1.0630, -0.2076, -1.6126],
+                                       [-0.6459,  1.4887, -1.4647]]],
+
+
+                                     [[[-0.6016,  0.8268,  1.3840],
+                                       [-0.2750, -0.2897,  0.9044],
+                                       [-1.8141, -0.2568,  0.3517]],
+
+                                      [[ 0.4624, -0.5173, -0.7067],
+                                       [-0.3159,  0.7693,  0.0949],
+                                       [ 0.2051,  1.2193, -1.5660]],
+
+                                      [[-0.0875,  0.5780, -0.2825],
+                                       [ 0.2239,  0.7976,  1.5523],
+                                       [ 0.6226, -0.4116,  1.0639]]]]).unsqueeze(0)
+
+            let t3 = dsharp.conv3d(t1, t2)
+            let t3Correct = combo.tensor([[[[ 3.1109,  6.7899],
+                                               [ 4.3064,  4.1053]],
+
+                                              [[ 5.0324, -8.8943],
+                                               [-0.1298,  1.2862]]]]).unsqueeze(0)
+
+            let t3p1 = dsharp.conv3d(t1, t2, padding=1)
+            let t3p1Correct = combo.tensor([[[[  2.9555,  -2.2637,  -7.1829,   5.6339],
+                                               [ -3.3115,  11.7124,   2.7917,   2.6118],
+                                               [  5.5319,   3.0030,   3.2099,  -2.7804],
+                                               [ -1.4804,  -0.1157,  -6.4439,  -0.0716]],
+
+                                              [[  2.4783,  -2.6479,   5.6216,  -1.2882],
+                                               [-10.3388,   3.1109,   6.7899,  -6.1003],
+                                               [ -1.3145,   4.3064,   4.1053,   5.3012],
+                                               [  2.6878,  -4.5237,  -0.6728,   0.6796]],
+
+                                              [[ -1.4721,  -4.1515,   4.6180,  -9.2384],
+                                               [  9.8664,   5.0324,  -8.8943,   5.2075],
+                                               [ -1.5404,  -0.1298,   1.2862,  -3.2419],
+                                               [  8.5308,   2.7561,  -6.2106,   1.8973]],
+
+                                              [[  0.9938,  -2.9158,  -5.2227,  -3.0340],
+                                               [  3.2490,   2.0787,   2.2262,  -2.4861],
+                                               [ -0.0842,   0.3416,  -3.8301,  -2.1084],
+                                               [  4.0825,  -1.9845,  -1.1269,   2.3267]]]]).unsqueeze(0)
+
+            let t3p123 = dsharp.conv3d(t1, t2, paddings=[|1; 2; 3|])
+            let t3p123Correct = combo.tensor([[[[ 0.0000e+00, -2.9020e+00,  4.5825e+00, -3.1431e+00, -1.0803e+00,
+                                                         8.2371e-01,  1.4897e-01,  0.0000e+00],
+                                                   [ 0.0000e+00, -1.2234e+00,  2.9555e+00, -2.2637e+00, -7.1829e+00,
+                                                         5.6339e+00,  5.1473e-01,  0.0000e+00],
+                                                   [ 0.0000e+00, -6.8862e-01, -3.3115e+00,  1.1712e+01,  2.7917e+00,
+                                                         2.6118e+00, -3.8470e-01,  0.0000e+00],
+                                                   [ 0.0000e+00,  3.3201e+00,  5.5319e+00,  3.0030e+00,  3.2099e+00,
+                                                        -2.7804e+00,  6.1979e-01,  0.0000e+00],
+                                                   [ 0.0000e+00,  8.8853e-01, -1.4804e+00, -1.1566e-01, -6.4439e+00,
+                                                        -7.1598e-02,  2.3270e-01,  0.0000e+00],
+                                                   [ 0.0000e+00, -3.5118e+00,  2.0512e+00,  1.6275e+00,  1.7109e+00,
+                                                         1.5145e-01, -1.7395e-01,  0.0000e+00]],
+
+                                                  [[ 0.0000e+00,  7.1204e+00,  3.0177e-04, -6.9272e+00,  2.8760e+00,
+                                                        -1.9002e-02, -2.4133e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  5.6420e+00,  2.4783e+00, -2.6479e+00,  5.6216e+00,
+                                                        -1.2882e+00, -5.9195e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  7.1537e-02, -1.0339e+01,  3.1109e+00,  6.7899e+00,
+                                                        -6.1003e+00,  1.2121e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  8.9927e-01, -1.3145e+00,  4.3064e+00,  4.1053e+00,
+                                                         5.3012e+00, -4.4293e+00,  0.0000e+00],
+                                                   [ 0.0000e+00, -5.7960e-01,  2.6878e+00, -4.5237e+00, -6.7276e-01,
+                                                         6.7965e-01, -6.6988e-01,  0.0000e+00],
+                                                   [ 0.0000e+00,  8.0942e-01,  6.4290e-01,  1.2871e+00,  5.3531e-01,
+                                                        -1.0901e+00, -1.6275e+00,  0.0000e+00]],
+
+                                                  [[ 0.0000e+00, -6.6101e-01, -4.8746e+00,  7.4949e+00,  3.0253e+00,
+                                                        -1.3816e+00, -4.6669e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  4.2946e+00, -1.4721e+00, -4.1515e+00,  4.6180e+00,
+                                                        -9.2384e+00,  3.2005e+00,  0.0000e+00],
+                                                   [ 0.0000e+00, -2.9133e+00,  9.8664e+00,  5.0324e+00, -8.8943e+00,
+                                                         5.2075e+00,  2.1560e+00,  0.0000e+00],
+                                                   [ 0.0000e+00, -9.4993e+00, -1.5404e+00, -1.2982e-01,  1.2862e+00,
+                                                        -3.2419e+00,  4.1770e-01,  0.0000e+00],
+                                                   [ 0.0000e+00, -4.7673e+00,  8.5308e+00,  2.7561e+00, -6.2106e+00,
+                                                         1.8973e+00,  2.6808e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  3.9791e+00,  5.8774e-01,  3.1007e-01, -4.0616e+00,
+                                                        -8.0652e-01,  7.2560e-01,  0.0000e+00]],
+
+                                                  [[ 0.0000e+00, -1.6718e+00,  2.1936e+00,  5.2331e-01, -2.4292e+00,
+                                                        -2.0133e+00,  5.9281e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  3.6098e+00,  9.9384e-01, -2.9158e+00, -5.2227e+00,
+                                                        -3.0340e+00,  1.4565e+00,  0.0000e+00],
+                                                   [ 0.0000e+00,  2.3582e+00,  3.2490e+00,  2.0787e+00,  2.2262e+00,
+                                                        -2.4861e+00,  3.0599e+00,  0.0000e+00],
+                                                   [ 0.0000e+00, -6.6049e+00, -8.4240e-02,  3.4158e-01, -3.8301e+00,
+                                                        -2.1084e+00,  2.8022e+00,  0.0000e+00],
+                                                   [ 0.0000e+00, -1.1513e+00,  4.0825e+00, -1.9845e+00, -1.1269e+00,
+                                                         2.3267e+00, -1.7839e-01,  0.0000e+00],
+                                                   [ 0.0000e+00,  1.3527e+00, -3.7297e+00,  1.3533e+00,  1.6894e+00,
+                                                        -3.2651e-01,  2.1566e-01,  0.0000e+00]]]]).unsqueeze(0)
+
+            let t3s2 = dsharp.conv3d(t1, t2, stride=2)
+            let t3s2Correct = combo.tensor([[[[3.1109]]]]).unsqueeze(0)
+
+            let t3s132 = dsharp.conv3d(t1, t2, strides=[|1; 3; 2|])
+            let t3s132Correct = combo.tensor([[[[3.1109]],
+                                                  [[5.0324]]]]).unsqueeze(0)
+
+            let t3s2p1 = dsharp.conv3d(t1, t2, stride=2, padding=1)
+            let t3s2p1Correct = combo.tensor([[[[ 2.9555, -7.1829],
+                                                   [ 5.5319,  3.2099]],
+
+                                                  [[-1.4721,  4.6180],
+                                                   [-1.5404,  1.2862]]]]).unsqueeze(0)
+
+            let t3s231p321 = dsharp.conv3d(t1, t2, strides=[2; 3; 1], paddings=[3; 2; 1])
+            let t3s231p321Correct = combo.tensor([[[[ 0.0000,  0.0000,  0.0000,  0.0000],
+                                                       [ 0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                      [[ 4.5825, -3.1431, -1.0803,  0.8237],
+                                                       [ 5.5319,  3.0030,  3.2099, -2.7804]],
+
+                                                      [[-4.8746,  7.4949,  3.0253, -1.3816],
+                                                       [-1.5404, -0.1298,  1.2862, -3.2419]],
+
+                                                      [[-0.1487, -1.5309,  1.1215,  3.0797],
+                                                       [ 1.4189,  1.4221,  4.1597,  1.4329]]]]).unsqueeze(0)
+            
+            Assert.That(t3.allclose(t3Correct, 0.01, 0.01))
+            Assert.That(t3p1.allclose(t3p1Correct, 0.01, 0.01))
+            Assert.That(t3p123.allclose(t3p123Correct, 0.01, 0.01))
+            Assert.That(t3s2.allclose(t3s2Correct, 0.01, 0.01))
+            Assert.That(t3s132.allclose(t3s132Correct, 0.01, 0.01))
+            Assert.That(t3s2p1.allclose(t3s2p1Correct, 0.01, 0.01))
+            Assert.That(t3s231p321.allclose(t3s231p321Correct, 0.01, 0.01))
+
+            let t3p1d2 = dsharp.conv3d(t1, t2, padding=1, dilation=2)
+            let t3p1d2Correct = combo.tensor([[[[-0.2568,  0.7812],
+                                                   [ 3.7157,  2.1968]],
+
+                                                  [[ 7.7515,  1.1481],
+                                                   [-1.2951, -2.1536]]]]).unsqueeze(0)
+            Assert.That(t3p1d2.allclose(t3p1d2Correct, 0.01, 0.01))
+
+            let t3p224d234 = dsharp.conv3d(t1, t2, paddings=[2;2;4], dilations=[2;3;4])
+            let t3p224d234Correct = 
+                                   combo.tensor([[[[ 0.5110,  0.8308,  0.8378,  2.1878],
+                                                   [ 0.5542,  0.8628,  0.0433,  0.7889]],
+
+                                                  [[ 0.7539,  0.8638,  2.9105, -0.6111],
+                                                   [-2.2889,  2.2566, -0.4374, -1.2079]],
+
+                                                  [[ 0.6620,  0.9611,  0.8799, -0.6184],
+                                                   [-1.5508, -0.7252, -0.3192,  0.4482]],
+
+                                                  [[-0.0271,  0.7710,  0.0897, -0.1711],
+                                                   [-0.8259, -1.5293,  0.9234, -0.6048]]]]).unsqueeze(0)
+            Assert.That(t3p224d234.allclose(t3p224d234Correct, 0.01, 0.01))
+
+            let t3s3p6d3 = dsharp.conv3d(t1, t2, stride=3, padding=6, dilation=3)
+            let t3s3p6d3Correct = 
+                                   combo.tensor([[[[-1.2082,  1.2172,  0.9059, -0.4916],
+                                                   [ 2.1467, -3.7502,  5.0506,  0.3885],
+                                                   [ 4.7375,  2.0637,  0.0984,  1.4406],
+                                                   [-1.3617,  0.8104, -0.4940,  0.5110]],
+
+                                                  [[-3.4229, -2.0909,  2.7974, -1.0638],
+                                                   [-2.9979, -0.1444, -3.2004, -0.2850],
+                                                   [ 1.0353, -1.1102,  0.8409, -0.3885],
+                                                   [-1.3945,  2.0495,  1.7803, -0.3152]],
+
+                                                  [[ 1.5129,  2.9412, -8.0788, -2.2397],
+                                                   [ 0.6883, -1.7963,  0.6140, -2.7854],
+                                                   [-1.1362,  1.5341, -3.5884, -1.6604],
+                                                   [ 3.4384,  1.9425, -1.4670, -0.8295]],
+
+                                                  [[-0.0370,  0.1560, -0.6491, -0.6168],
+                                                   [ 2.4056,  0.5702, -3.0690, -0.5726],
+                                                   [ 1.9479,  0.2854, -1.4980, -0.0100],
+                                                   [-0.1114, -1.0524, -0.8736, -0.2113]]]]).unsqueeze(0)
+            Assert.That(t3s3p6d3.allclose(t3s3p6d3Correct, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorConvTranspose1D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[[-1.2531,  0.9667,  0.2120, -1.2948,  0.4470,  1.3539],
+                                    [-0.3736,  0.8294, -0.8978,  0.1512, -1.9213, -0.0488],
+                                    [-0.6830,  0.0080, -0.1773, -1.7092, -0.0818, -0.2670]]])
+            let t2 = combo.tensor([[[ 0.1036,  0.4791, -1.3667],
+                                    [ 1.8627, -1.0295, -0.9342]],
+                           
+                                   [[-0.1559,  0.4204, -1.0169],
+                                    [ 1.0772,  0.9606,  0.4394]],
+                           
+                                   [[-0.0849,  0.5367, -1.4039],
+                                    [-0.1863,  0.8559,  0.1834]]])
+
+            let t3 = dsharp.convTranspose1d(t1, t2)
+            let t3Correct = combo.tensor([[[-0.0135, -1.1538,  4.0443, -2.5593, -0.2493,  3.5484,  1.9425,
+                                            -1.4259],
+                                           [-2.6092,  3.0392,  0.1504, -3.7002, -1.8314,  1.1058, -2.9461,
+                                            -1.3352]]])
+
+            let t3p1 = dsharp.convTranspose1d(t1, t2, padding=1)
+            let t3p1Correct = combo.tensor([[[-1.1538,  4.0443, -2.5593, -0.2493,  3.5484,  1.9425],
+                                              [ 3.0392,  0.1504, -3.7002, -1.8314,  1.1058, -2.9461]]])
+
+            let t3p2 = dsharp.convTranspose1d(t1, t2, padding=2)
+            let t3p2Correct = combo.tensor([[[ 4.0443, -2.5593, -0.2493,  3.5484],
+                                             [ 0.1504, -3.7002, -1.8314,  1.1058]]])
+
+            let t3s2 = dsharp.convTranspose1d(t1, t2, stride=2)
+            let t3s2Correct = combo.tensor([[[-0.0135, -1.1240,  3.0214,  0.8161, -1.9989, -0.3710,  0.8596,
+                                              -1.4742,  4.3680, -0.6374,  1.6282,  0.4848, -1.4259],
+                                             [-2.6092,  0.3466,  3.5738, -0.1917, -1.0763, -1.2325, -2.5556,
+                                               0.0154, -0.2591, -2.3758,  1.2422, -1.6693, -1.3352]]])
+
+            let t3s3 = dsharp.convTranspose1d(t1, t2, stride=3)
+            let t3s3Correct = combo.tensor([[[-0.0135, -1.1240,  3.0512, -0.0298,  0.8161, -2.1758,  0.1770,
+                                              -0.3710,  0.8721, -0.0125, -1.4742,  4.0153,  0.3527, -0.6374,
+                                               1.4576,  0.1705,  0.4848, -1.4259],
+                                             [-2.6092,  0.3466,  0.8812,  2.6926, -0.1917, -0.5372, -0.5391,
+                                              -1.2325, -0.6251, -1.9305,  0.0154,  0.9626, -1.2217, -2.3758,
+                                              -1.2768,  2.5191, -1.6693, -1.3352]]])
+
+            let t3s2p1 = dsharp.convTranspose1d(t1, t2, stride=2, padding=1)
+            let t3s2p1Correct = combo.tensor([[[-1.1240,  3.0214,  0.8161, -1.9989, -0.3710,  0.8596, -1.4742,
+                                                 4.3680, -0.6374,  1.6282,  0.4848],
+                                               [ 0.3466,  3.5738, -0.1917, -1.0763, -1.2325, -2.5556,  0.0154,
+                                                 -0.2591, -2.3758,  1.2422, -1.6693]]])
+
+            let t3s3p2 = dsharp.convTranspose1d(t1, t2, stride=3, padding=2)
+            let t3s3p2Correct = combo.tensor([[[ 3.0512, -0.0298,  0.8161, -2.1758,  0.1770, -0.3710,  0.8721,
+                                                  -0.0125, -1.4742,  4.0153,  0.3527, -0.6374,  1.4576,  0.1705],
+                                                 [ 0.8812,  2.6926, -0.1917, -0.5372, -0.5391, -1.2325, -0.6251,
+                                                   -1.9305,  0.0154,  0.9626, -1.2217, -2.3758, -1.2768,  2.5191]]])
+
+            let t3d2 = dsharp.convTranspose1d(t1, t2, dilation=2)
+            let t3d2Correct = combo.tensor([[[-0.0135, -0.0298, -0.9470,  0.8036,  3.0329, -3.4795,  0.2347,
+                                                 4.5001,  1.4576, -1.4259],
+                                               [-2.6092,  2.6926, -0.1925, -2.1222, -1.5730,  1.9973, -3.0009,
+                                                -0.7067, -1.2768, -1.3352]]])
+
+            let t3p2d3 = dsharp.convTranspose1d(t1, t2, padding=2, dilation=3)
+            let t3p2d3Correct = combo.tensor([[[ 0.1770, -1.1365,  1.1688, -0.2005,  1.5770, -2.8133,  1.3570,
+                                                     4.0153],
+                                                   [-0.5391, -1.5840, -1.4133,  1.2866,  0.8965, -2.9130, -2.2944,
+                                                     0.9626]]])
+
+            let t3s3p6d3 = dsharp.convTranspose1d(t1, t2, stride=3, padding=6, dilation=3)
+            let t3s3p6d3Correct = combo.tensor([[[ 4.0443,  0.0000,  0.0000, -2.5593,  0.0000,  0.0000, -0.2493,
+                                                   0.0000,  0.0000,  3.5484],
+                                                 [ 0.1504,  0.0000,  0.0000, -3.7002,  0.0000,  0.0000, -1.8314,
+                                                   0.0000,  0.0000,  1.1058]]])
+
+            Assert.That(t3Correct.allclose(t3, 0.01))
+            Assert.That(t3p1Correct.allclose(t3p1, 0.01))
+            Assert.That(t3p2Correct.allclose(t3p2, 0.01))
+            Assert.That(t3s2Correct.allclose(t3s2, 0.01))
+            Assert.That(t3s3Correct.allclose(t3s3, 0.01))
+            Assert.That(t3s2p1Correct.allclose(t3s2p1, 0.01))
+            Assert.That(t3s3p2Correct.allclose(t3s3p2, 0.01))
+            Assert.That(t3d2Correct.allclose(t3d2, 0.01))
+            Assert.That(t3p2d3Correct.allclose(t3p2d3, 0.01))
+            Assert.That(t3s3p6d3Correct.allclose(t3s3p6d3, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorConvTranspose2D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[[[-2.0280, -7.4258, -1.1627, -3.6714],
+                                       [ 3.1646, -2.0775,  1.1166, -3.1054],
+                                       [-2.9795,  6.3719,  6.7753, -0.2423],
+                                       [-5.1595, -1.5602, -1.5165, -4.1525]],
+                             
+                                      [[-4.4974, -1.6737,  0.2967, -1.3116],
+                                       [ 3.7593, -1.4428, -2.1954, -3.8098],
+                                       [-0.2220,  4.3347,  2.6288,  4.9739],
+                                       [-2.8094, -3.4588, -1.3126, -2.8789]],
+                             
+                                      [[ 1.8656,  3.6751,  3.6202,  0.7065],
+                                       [ 2.9986, -2.5643, -3.2444, -0.0339],
+                                       [-1.0250,  3.4748, -0.9057,  0.6292],
+                                       [ 0.1423,  2.9450,  4.5264, -1.4891]]],
+                             
+                             
+                                     [[[-0.5852, -1.6015, -0.2604,  6.8539],
+                                       [-1.6572,  0.3233,  2.4716,  0.8160],
+                                       [-7.9254,  0.5539, -0.4043,  0.7395],
+                                       [ 2.3128,  1.5731,  2.1585,  0.2829]],
+                             
+                                      [[ 2.0864, -4.2912,  0.8241,  3.3248],
+                                       [ 2.4391,  5.8813,  1.0969, -0.4856],
+                                       [ 2.2431, -3.8626, -0.0758,  0.7386],
+                                       [-1.3231,  2.5438, -3.1992,  2.7404]],
+                             
+                                      [[ 2.1057,  2.1381,  4.3754, -4.7032],
+                                       [-0.0310,  1.5864, -4.6051, -3.2207],
+                                       [-8.3767,  1.9677, -2.5842,  0.6181],
+                                       [-5.3311,  3.3852, -0.9679, 10.0806]]]])
+            let t2 = combo.tensor([[[[-0.6207,  0.9829,  1.9519],
+                                       [-1.3195, -1.0054, -0.0422],
+                                       [-0.7566, -0.5450,  0.0660]],
+                             
+                                      [[ 1.2635, -0.5134, -1.5355],
+                                       [ 0.0294, -0.7468,  1.5766],
+                                       [-0.6810,  0.0306,  0.7619]]],
+                             
+                             
+                                     [[[ 1.8189,  0.0156,  1.2304],
+                                       [-0.6246, -0.5269, -0.6632],
+                                       [ 1.0706,  0.0366,  0.4163]],
+                             
+                                      [[ 1.1352,  0.1125, -1.1641],
+                                       [-0.4009,  0.2187,  0.6077],
+                                       [ 0.0796, -1.0126, -0.2706]]],
+                             
+                             
+                                     [[[-0.1754,  0.1714, -0.4221],
+                                       [ 0.3765, -2.9587,  1.4150],
+                                       [ 0.3446, -0.8976,  2.2664]],
+                             
+                                      [[-0.4247, -0.3800,  1.0981],
+                                       [-1.4746,  0.9296,  0.3400],
+                                       [ 0.1843,  1.0527,  0.3531]]]])
+
+            let t3 = dsharp.convTranspose2d(t1, t2)
+            let t3Correct = combo.tensor([[[[ -7.2488,  -0.8234, -16.3482, -18.8535,  -6.9405,  -9.0783],
+                                               [ 10.5350,  13.9143,   8.8096,  -6.3821,   4.9971,  -8.7103],
+                                               [ -6.4071,  -6.7860,  18.0874,  51.6045,  28.4071,   8.8040],
+                                               [  4.4154, -19.7267, -40.3442, -24.0385, -25.0267, -15.2847],
+                                               [ 10.2796,  15.1273, -10.1839,  15.1099,  16.6752,   3.4580],
+                                               [  0.9450,   1.0737,  -1.8054,   1.3937,  13.1064,  -4.8475]],
+                                     
+                                              [[ -8.4601, -13.0171,   9.9559,  10.2132,   6.8841,   7.9401],
+                                               [  5.9849,  -8.2165,  -5.9590, -13.5684,   1.1469,   2.8209],
+                                               [ -8.0502,  30.9921,  31.5255, -14.5540, -10.9641, -14.1428],
+                                               [ -9.5590, -10.0463,  -0.3495,   4.8396,  29.0524,   9.5997],
+                                               [  2.5872,  -3.9301, -20.5199,   6.8982,  -0.8133, -10.1110],
+                                               [  3.3162,   4.1667,   5.1967,   9.1590,   2.0185,  -2.9105]]],
+                                     
+                                     
+                                             [[[  3.7889,  -7.3680,   0.1545,  -6.1833,   4.6413,  19.4543],
+                                               [  5.7324,   7.4798,   2.3715, -12.0400,  19.2886,  -6.7945],
+                                               [ 14.5222, -24.0757, -16.1896,  -1.7090,  13.5193, -11.0012],
+                                               [  6.8502,  46.0075, -20.2598,  28.5731, -11.2059,  -7.4251],
+                                               [  1.2786,  19.7991, -42.3652,  12.9959, -37.7386,  14.1919],
+                                               [ -5.0035,   6.1760, -21.6751,  14.6040, -12.4852,  24.0061]],
+                                     
+                                              [[  0.7348,  -8.0679,  -0.9427,  22.7978,   2.8876, -19.5593],
+                                               [ -3.2706,   8.9194,   2.4042,   2.6916, -16.5644,   7.0029],
+                                               [ -3.9380,   1.2676,  14.5253,  11.3920, -10.3566,   1.2414],
+                                               [ 16.2215,  -0.6001, -28.4006, -15.5361,  -8.6123,   8.8859],
+                                               [ 12.4917, -24.5919,   2.5210, -14.8144,   9.6141,   6.1206],
+                                               [ -2.6629,  -4.4465,  -0.6293,   5.8754,  10.0140,   3.0334]]]])
+
+            let t1p1 = combo.tensor([[[[ 1.0744,  7.9558,  0.4934,  2.3298,  2.3925, -1.2102],
+                                       [-2.1089,  4.0487,  0.9586,  4.5810,  1.0251,  5.6604],
+                                       [-3.4522, -4.8244,  0.5531, -6.3983, -5.8461,  3.7263],
+                                       [ 7.5891,  4.5920,  1.9801, -5.1166, -3.8933,  2.1153],
+                                       [ 0.6262,  2.5322, -6.0730, -3.4204,  2.3583,  0.4224],
+                                       [ 0.6814, -0.9715, -1.2208,  9.5117, -1.2802,  2.0687]],
+                             
+                                      [[ 3.3219, -0.4099, -0.3930, -1.8511, -2.0642, -1.9206],
+                                       [ 2.6994,  1.6932,  1.3649,  3.2362,  2.3385, -0.2216],
+                                       [-4.3740, -8.2227, -2.9300, -8.7945, -2.0426, -1.1449],
+                                       [ 3.6044, -0.5552,  0.0607,  3.7366,  0.1317,  0.3760],
+                                       [ 0.7646, -3.2802, -0.7214, -5.0273,  0.0336, -3.9015],
+                                       [-1.3125,  1.8098, -1.9835,  7.9206, -0.8541,  3.2770]],
+                             
+                                      [[ 3.0539, -3.7408,  1.0175, -3.9080, -1.6320, -0.7949],
+                                       [ 0.6580,  3.8309,  5.3238, -6.3294,  5.0700,  4.4601],
+                                       [ 4.7103, -1.8846,  3.8602, -3.9283,  4.4575,  1.5436],
+                                       [-2.9477,  4.4539,  0.6466,  3.8747, -1.8396,  0.4202],
+                                       [ 2.0424,  4.7229, -2.0569, -0.7198, -7.7648,  3.7662],
+                                       [ 6.3408, -1.8474, -2.4028, -1.1776,  6.5768, -2.5502]]],
+                             
+                             
+                                     [[[ 0.2038, -1.9139, -1.0913,  1.7458,  1.3187, -0.7424],
+                                       [-0.6190, -1.4809, -4.1774,  4.1776, -1.6485, -2.8223],
+                                       [ 1.3574, -0.9936,  0.4081, -1.2595, -3.1222, -0.1957],
+                                       [ 3.2237, -3.5044, -2.2179,  1.1732,  2.7336, -1.0194],
+                                       [ 2.8080, -0.6129,  2.4027, -0.8684, -5.8353,  0.5219],
+                                       [-5.1602,  0.4612, -1.8575, -1.8444,  1.2732,  5.0051]],
+                             
+                                      [[ 0.4338, -0.3004,  3.5642,  0.7867, -0.3105,  0.5667],
+                                       [ 0.0962, -0.1167, -1.1296,  1.1743, -0.3805,  0.3942],
+                                       [ 3.1247, -0.7838,  7.1643, -3.3606, -2.5899,  0.4827],
+                                       [-0.7164, -0.9592, -1.6169,  2.0705,  1.3104,  2.9180],
+                                       [ 0.9406,  6.0178,  7.0580, -1.1603, -4.9145, -3.0228],
+                                       [-1.2659, -4.5113, -0.4634,  2.0256,  3.4598,  1.6469]],
+                             
+                                      [[ 6.1612, -7.6000,  1.1598,  2.3335, -6.1723,  5.6237],
+                                       [ 3.0543, -5.6086,  2.6119, -0.5712, -0.5620,  3.4211],
+                                       [-0.8446, -1.7392, -4.8108, -0.0792, -4.0653,  2.2177],
+                                       [ 0.2648, -1.0341, -3.0084,  0.6107,  3.5405,  3.5716],
+                                       [ 8.1575, -5.9643, -5.5036, -1.8790, -2.2454, -1.4370],
+                                       [-1.7650, -5.9335,  3.4498,  0.8872, -1.0203,  3.9062]]]])
+
+            let t3p1 = dsharp.convTranspose2d(t1p1, t2, padding=1)
+            let t3p1Correct = combo.tensor([[[[-2.5539e+01,  9.8793e+00,  2.3522e+00,  1.6893e+01,  1.4417e+01,
+                                                 1.2602e+01],
+                                               [-3.1444e+01, -3.5893e+01, -6.7083e+01,  5.7391e+00, -7.6345e+01,
+                                                -2.8184e+01],
+                                               [ 5.7274e+00,  5.1016e+01,  2.4985e+01,  6.2553e+01, -4.1881e+01,
+                                                 1.1302e+00],
+                                               [-2.0541e+01, -9.0034e+00, -2.3712e+01, -5.8394e-01, -2.7339e+01,
+                                                 8.2359e+00],
+                                               [-4.5268e+00, -2.3789e+01,  5.4599e+01,  7.0560e+00,  7.2854e+01,
+                                                -3.1187e+01],
+                                               [-2.5245e+01,  1.9611e+01, -7.4000e-01, -1.5047e+01, -3.2242e+01,
+                                                -7.5167e+00]],
+                                     
+                                              [[ 1.5225e+01, -8.1117e+00,  2.3894e+01,  3.4110e+00, -1.5564e+01,
+                                                 -1.6471e+00],
+                                               [-2.4827e+01,  2.1827e+00,  2.1729e+01, -1.7261e+01,  2.6620e+01,
+                                                 2.1128e+01],
+                                               [ 8.8218e+00, -3.1076e+01, -4.0147e+00, -1.8270e+01,  3.3127e+00,
+                                                 -9.0832e-01],
+                                               [-5.8058e+00,  8.6448e+00,  7.2669e+00,  4.1642e+01,  2.9576e+00,
+                                                -2.1493e+01],
+                                               [-1.4143e+01,  2.1080e+01,  3.9626e+01, -1.6192e+00, -4.8345e+01,
+                                                 1.0363e+01],
+                                               [ 7.3484e+00,  1.9200e+01,  6.8525e-01, -2.3770e+01,  1.9260e+01,
+                                                 3.4740e+00]]],
+                                     
+                                     
+                                             [[[-1.7204e+01,  2.7923e+01, -2.1302e+01, -1.9810e+01,  3.2535e+01,
+                                                -3.0405e+01],
+                                               [-1.4934e+01,  7.5868e+01, -4.2455e+01,  9.5382e+00,  1.0415e+01,
+                                                -3.3989e+01],
+                                               [ 1.1103e+00,  1.2644e+01, -1.0561e+01, -3.6917e+00,  3.7296e+01,
+                                                 -8.4717e+00],
+                                               [ 1.7178e+01,  3.1835e+01,  1.5056e+01, -9.7515e+00, -2.2679e+01,
+                                                 -3.5688e+01],
+                                               [-4.6248e+01,  6.6559e+00,  2.6084e+00, -5.3964e+00,  1.5587e+01,
+                                                 2.4642e+01],
+                                               [ 7.0854e+00,  5.0883e+01, -2.0413e+01, -9.6752e+00, -1.0166e+01,
+                                                 -2.7874e+01]],
+                                     
+                                              [[ 1.6488e+01, -6.7920e+00, -2.6865e+00,  1.7668e+01, -2.3272e+01,
+                                                 8.2484e+00],
+                                               [ 1.6097e+01, -6.7093e+00, -8.1159e+00, -2.9688e+01,  7.8054e+00,
+                                                 9.2428e+00],
+                                               [-2.1152e+00, -1.7606e-01,  4.4501e+00,  1.2627e+01, -1.0182e+01,
+                                                -6.1416e+00],
+                                               [-7.5072e-02,  2.9625e+01, -3.5118e+01, -3.8816e+01, -1.1095e+00,
+                                                 2.1909e+01],
+                                               [ 1.6438e+01,  1.7336e+01, -9.1775e+00,  1.9114e+01,  1.4552e+01,
+                                                 -2.2556e+01],
+                                               [ 2.0026e+01, -3.1022e+01, -1.8629e+01,  1.0793e+00, -8.2290e+00,
+                                                 1.6719e+00]]]])
+
+            let t1p12 = combo.tensor([[[[ 2.0904e-01, -3.0708e+00, -5.8043e-01,  5.2003e-01,  3.1007e+00,
+                                            -3.0689e+00,  1.9686e+00,  2.2593e+00],
+                                           [-1.5114e+00, -3.5046e+00, -7.6147e+00, -7.7734e-01, -7.9658e-01,
+                                            -2.7403e+00,  3.2388e+00,  4.1308e-01],
+                                           [ 7.0679e+00,  2.5667e+00, -3.3968e+00, -2.1805e+00, -4.6535e+00,
+                                             -6.6126e+00, -4.5696e+00, -2.7322e+00],
+                                           [ 4.8306e+00,  1.1991e+00,  1.6866e-01,  4.3821e-01, -4.4588e-01,
+                                             2.7424e+00,  3.8553e+00, -1.8864e-01],
+                                           [ 3.8901e-01, -5.3517e+00, -2.3543e+00,  3.5484e+00,  3.9898e-01,
+                                             -4.1207e+00, -1.5045e+00,  1.9773e+00],
+                                           [ 4.3314e+00, -3.5333e+00,  2.1335e+00,  5.1173e+00,  5.2105e+00,
+                                             -5.9196e+00, -2.3715e+00,  8.5792e-02]],
+                                 
+                                          [[ 1.4584e+00,  1.0401e+00,  4.0129e+00,  1.2725e+00, -4.3258e-01,
+                                             -3.3049e-01, -1.2140e+00, -1.6860e+00],
+                                           [-1.7470e+00, -1.6925e+00, -7.9839e-02,  5.8790e-01, -1.4510e+00,
+                                             4.8597e+00,  4.4617e+00,  3.7802e+00],
+                                           [ 2.7816e+00, -1.4593e-01,  7.2832e-01,  1.8055e-01, -2.4145e+00,
+                                             -3.6923e+00, -2.9494e+00, -6.4016e+00],
+                                           [ 3.0402e+00,  6.5263e-01,  7.9575e+00, -2.5088e+00,  4.5268e+00,
+                                             6.6195e+00,  1.6011e+00,  4.3730e+00],
+                                           [-1.4767e+00, -2.0553e+00,  1.7944e+00, -6.4128e-02, -3.9420e-01,
+                                            -9.2923e-01,  3.8154e+00, -9.5326e-01],
+                                           [ 2.3029e+00, -1.4282e+00,  4.1835e+00, -7.0811e-01,  4.0882e+00,
+                                             -1.2903e+00, -3.6706e-01, -2.4274e+00]],
+                                 
+                                          [[ 5.5625e+00, -3.0755e-01, -5.8200e+00,  8.1142e+00, -5.4013e+00,
+                                             -3.2303e+00, -5.2555e-01, -7.5444e-01],
+                                           [ 8.5872e+00, -1.0552e+01,  1.7941e+00,  4.1905e+00, -7.0491e-02,
+                                             6.0357e+00,  8.2003e-01,  1.2992e+00],
+                                           [ 2.3029e+00, -7.7644e+00,  5.2392e+00,  3.0534e+00, -1.3255e+00,
+                                             1.9722e+00, -8.8349e+00,  1.8596e+00],
+                                           [ 4.1077e+00, -6.4727e+00,  5.4707e+00, -8.1994e-01, -2.2840e+00,
+                                             -7.3100e+00,  1.6094e+00, -1.8923e-01],
+                                           [ 3.6762e+00, -8.5700e+00,  7.6150e+00, -7.6913e+00,  4.0187e+00,
+                                             -3.4347e+00,  3.4880e+00,  4.4458e-01],
+                                           [-9.3896e-02, -1.3383e+00, -1.4096e-02, -2.6950e+00,  4.1328e+00,
+                                            -3.0136e-02, -4.4437e+00,  2.1302e+00]]],
+                                 
+                                 
+                                         [[[-2.3001e+00, -6.1048e-01, -5.0777e-01,  6.5870e+00, -6.9482e-01,
+                                             1.8224e+00, -1.5978e+00, -9.7060e-01],
+                                           [ 1.0086e+00, -2.5574e+00, -3.5676e+00, -1.6516e+00, -3.0457e-02,
+                                             2.0455e+00, -2.9152e+00,  4.7178e-01],
+                                           [ 1.8213e+00,  2.9062e+00, -1.6246e+00, -3.0354e+00,  1.9258e+00,
+                                             1.2320e+00,  1.7550e-01,  1.5679e+00],
+                                           [-3.1161e-01,  2.1187e-01,  7.7038e-01, -8.0618e+00, -3.1787e+00,
+                                            -7.8896e-01,  2.8006e+00,  1.1497e+00],
+                                           [-4.3055e+00,  2.3031e+00,  6.8383e+00,  3.3530e+00, -2.6364e+00,
+                                            -2.0941e+00,  2.3572e-01, -1.9117e-02],
+                                           [-5.0467e-01, -4.9318e+00, -1.4161e+00, -4.3488e+00, -2.0141e+00,
+                                            -9.1710e-01, -1.2912e+00, -4.6389e-01]],
+                                 
+                                          [[-1.5890e-02,  2.9213e-01,  2.8771e+00,  3.3473e+00, -1.5947e+00,
+                                            -8.5990e-02,  5.4676e-01, -5.4066e-01],
+                                           [ 4.7811e-01, -9.0797e-01, -3.5322e+00, -5.5444e+00, -9.3019e-01,
+                                             -2.8029e+00, -2.3730e+00,  1.3185e+00],
+                                           [ 1.1198e+00,  1.3149e+00,  3.9382e+00,  9.9105e-01,  2.3394e+00,
+                                             1.5633e+00,  2.0929e+00, -3.1767e-02],
+                                           [ 1.2592e+00,  8.2358e-01, -3.0186e+00, -4.4605e+00, -2.6266e+00,
+                                             -7.4811e-01,  1.6034e+00,  5.7280e-01],
+                                           [-2.5910e-01,  4.7576e+00,  6.1715e+00,  5.1725e+00,  4.2087e+00,
+                                             1.5060e-02,  2.3843e+00, -1.1196e+00],
+                                           [ 3.4526e-02, -3.3107e+00, -2.9831e+00, -4.9749e+00,  1.3786e+00,
+                                             -2.0894e+00, -3.8216e-04, -4.6755e-01]],
+                                 
+                                          [[ 1.9114e+00, -2.1377e+00,  1.1277e+00,  1.8681e+00,  7.3343e+00,
+                                             -1.7746e+00,  1.5446e+00, -6.5108e-01],
+                                           [ 9.0044e-01, -3.1848e+00, -3.0141e+00,  8.5436e+00,  4.0129e-01,
+                                             -9.0136e-01,  4.6455e-01, -1.2833e+00],
+                                           [-9.6027e-01,  1.5802e+00,  1.1102e+00,  8.0889e-01,  2.0755e+00,
+                                             2.4087e-01, -2.8644e+00, -8.1120e-01],
+                                           [-2.9667e+00, -1.1450e+00,  2.8817e+00, -7.4703e+00,  4.4933e+00,
+                                             1.5010e+00, -1.4258e+00,  3.4844e-01],
+                                           [ 6.3914e-02, -6.0574e+00,  3.2300e+00, -6.4394e+00,  7.5388e+00,
+                                             -3.8723e+00, -1.0272e+00,  5.6870e-01],
+                                           [-1.0461e+00, -4.1427e+00,  1.4182e-01,  5.8372e+00, -3.7351e-01,
+                                            -2.1219e+00, -5.3250e-01,  7.3212e-01]]]])
+
+            let t3p12 = dsharp.convTranspose2d(t1p12, t2, paddings=[1;2])
+            let t3p12Correct = combo.tensor([[[[-3.3253e+00,  5.4371e+00, -5.9816e+01,  3.2848e+01,  4.3607e+00,
+                                                 1.6492e+00],
+                                               [ 9.7679e+01,  6.7747e+00, -4.1665e+01,  2.0032e+01, -5.2838e+01,
+                                                 -5.1707e+01],
+                                               [ 8.8758e+01, -3.6796e+01,  2.3499e+01,  5.4406e+01,  1.2719e+01,
+                                                 9.2254e+01],
+                                               [ 2.9644e+01, -5.8237e+01,  8.4483e+00,  1.9658e+01, -4.7019e-01,
+                                                 -2.9818e+01],
+                                               [ 8.3079e+01, -7.1435e+01,  6.8016e+01,  1.2094e+00,  3.2177e+01,
+                                                 -5.0162e+01],
+                                               [ 2.5591e+01, -4.3952e+01,  1.5923e+01, -3.4699e+01,  3.2012e+01,
+                                                 6.0155e+00]],
+                                     
+                                              [[ 2.1227e+01, -2.3985e+01,  2.5212e+01,  6.6007e+00,  1.4629e+01,
+                                                 1.4605e+00],
+                                               [-2.6661e+01, -3.7247e+01, -3.5895e+00, -1.5200e+01,  4.8951e+00,
+                                                 2.6003e+00],
+                                               [-2.9117e+00, -1.1533e+01,  5.0551e+00,  2.0305e+01,  1.1223e+01,
+                                                -3.9817e+01],
+                                               [ 4.3272e-01,  2.2397e+01,  2.0958e+01, -9.2573e+00,  2.9995e+00,
+                                                 -1.3124e-02],
+                                               [-1.6896e+01,  1.5415e+01, -1.4137e+01, -2.0068e+01, -3.1009e+01,
+                                                 4.9770e+00],
+                                               [ 5.7245e+00, -7.4357e+00, -1.5237e+01,  1.4700e+01,  2.3845e+01,
+                                                 -2.0412e+01]]],
+                                     
+                                     
+                                             [[[ 4.3162e+00, -3.7015e+01, -2.1557e+01, -4.2872e+01,  1.5128e+01,
+                                                 -3.9774e+00],
+                                               [ 4.5918e+01,  2.2101e+01, -2.0702e+01,  1.0873e+01,  3.8851e+01,
+                                                 -7.5143e-02],
+                                               [-1.0218e+01, -2.4655e+00, -3.6693e+01, -2.1096e+01, -1.4586e+01,
+                                                 2.4224e+00],
+                                               [-7.7054e-02,  3.7449e+01,  8.2727e+01,  2.5300e+00,  4.3413e+00,
+                                                -6.6815e-01],
+                                               [-1.6899e+01, -6.8931e+01,  2.8213e+01, -7.6618e+01,  2.3916e+01,
+                                                -5.7188e+00],
+                                               [ 2.7919e+01, -7.0535e+00,  1.7199e+01, -1.2670e+00,  3.3749e+01,
+                                                 -6.5397e+00]],
+                                     
+                                              [[-1.3218e+01, -1.1301e+01, -9.3226e+00,  4.0663e+01, -1.1171e+01,
+                                                 8.9378e+00],
+                                               [-1.3149e+00, -3.0373e+01,  3.3557e+00,  1.8259e+01, -1.2272e+00,
+                                                 7.0654e+00],
+                                               [-7.9169e+00, -9.3778e+00,  1.4320e+01,  5.5024e+00,  3.0991e+01,
+                                                 1.1212e+01],
+                                               [ 2.0909e+01,  9.3709e+00, -2.4690e+01, -3.7275e+01,  1.1494e+01,
+                                                 -1.2765e+01],
+                                               [-2.6079e+01,  1.4229e+01,  1.4370e+00,  3.9834e+01,  3.5829e-01,
+                                                -8.2415e+00],
+                                               [-1.8959e+01, -2.4770e+01, -1.8573e-01,  6.9171e-01, -8.2630e+00,
+                                                -1.0300e+01]]]])
+
+            let t1s2 = combo.tensor([[[[-4.2302, -2.7939],
+                                       [ 4.5052,  3.8188]],
+                             
+                                      [[ 5.7356,  8.4538],
+                                       [ 3.7488,  6.3469]],
+                             
+                                      [[ 8.4869, 10.8920],
+                                       [ 6.1609, -5.2561]]],
+                             
+                             
+                                     [[[ 4.4355, -3.7308],
+                                       [-1.7996,  2.1566]],
+                             
+                                      [[ 4.5993, -2.7351],
+                                       [ 4.9944,  1.7658]],
+                             
+                                      [[-3.0583, -7.1895],
+                                       [ 9.4745,  6.8794]]]])
+            let t3s2 = dsharp.convTranspose2d(t1s2, t2, stride=2)
+            let t3s2Correct = combo.tensor([[[[ 11.5695,  -2.6138,  10.4181,  -0.7474,   0.3506],
+                                               [  5.1947, -23.8791,  10.8908, -33.8715,   9.9235],
+                                               [ 15.2073,   0.4402,  57.1628,  -4.9930,  45.5023],
+                                               [ -5.9665, -24.7330,  -4.9407,   8.3677, -11.8079],
+                                               [  2.7278,  -7.8481,  17.9155,   2.8690,  -9.0182]],
+                                     
+                                              [[ -2.4383,  -0.4080,  10.5790,  -1.7535,   6.4095],
+                                               [-14.9385,  12.3029, -19.8310,  14.0606,   4.4357],
+                                               [ 12.2329,  -1.2355,  12.5506,   3.5710, -19.5942],
+                                               [-10.4553,   3.1825,  16.7942,  -6.3499,   8.0906],
+                                               [ -1.6342,   2.8274,   1.5294, -11.8432,  -0.6639]]],
+                                     
+                                     
+                                             [[[  6.1489,   3.9072,  14.2093,  -4.9419,  -7.6127],
+                                               [ -9.8767,   2.1657,  -3.6406,  26.4635,  -8.2018],
+                                               [  9.0536,   0.4292,  -8.0068,  11.7128, -14.2008],
+                                               [  2.8222, -28.8545,   8.8117, -23.4528,   8.4723],
+                                               [  9.9734,  -7.3408,  26.0629,  -7.2857,  16.4690]],
+                                     
+                                              [[ 12.1241,  -0.5976, -20.2883,   4.3397,   1.0178],
+                                               [  2.7963,  -5.1495,  20.3365,  -4.4954,  -9.9885],
+                                               [ -3.8460,  -9.8555,  11.2138,  -8.4357,  -2.4537],
+                                               [-16.0263,  11.2437,  -7.3697,   5.1708,   6.8122],
+                                               [  3.3692,   4.8615,   0.5627,   5.5199,   3.5944]]]])
+
+            let t1s13 = combo.tensor([[[[-9.8044, -2.9782],
+                                           [-2.7887,  4.5641],
+                                           [ 0.5278,  4.7393],
+                                           [-4.0212, -5.5322]],
+                                 
+                                          [[ 0.7842, -1.7191],
+                                           [-0.1781, -0.0738],
+                                           [ 7.6769, -0.2776],
+                                           [-5.3948, -1.7661]],
+                                 
+                                          [[ 6.1815, -2.2200],
+                                           [-9.2024, -5.4785],
+                                           [-6.2536,  0.4347],
+                                           [-2.3570,  4.6716]]],
+                                 
+                                 
+                                         [[[-3.0220,  2.2930],
+                                           [-3.3329,  1.0919],
+                                           [ 0.4386, -5.8802],
+                                           [-3.3151,  0.9038]],
+                                 
+                                          [[ 2.5312,  4.7056],
+                                           [ 0.3190,  0.0251],
+                                           [-2.4100, -0.1728],
+                                           [ 1.5978, -2.1062]],
+                                 
+                                          [[-1.8104, -8.8542],
+                                           [-2.7608,  3.7158],
+                                           [ 1.1023,  0.6211],
+                                           [ 0.3481, -3.1282]]]])
+            let t3s13 = dsharp.convTranspose2d(t1s13, t2, strides=[1;3])
+            let t3s13Correct = combo.tensor([[[[  6.4277,  -8.5650, -20.7816,  -0.8889,  -3.3346,  -6.9913],
+                                               [ 17.7955, -13.1661,   6.8624,   2.1615,  14.0141,   9.2549],
+                                               [ 25.4467,  29.5152,  14.0191, -11.9140,  19.9407,  -5.1138],
+                                               [-16.0010,  19.2597, -48.5687, -11.9344,  -8.1418, -26.4900],
+                                               [ 13.4527,  19.4654, -10.5299,   6.4285, -10.3124,   9.1974],
+                                               [ -3.5454,   4.1097,  -7.8530,   3.9047,  -1.2428,   9.4873]],
+                                     
+                                              [[-14.1229,   2.7728,  20.9297,  -4.7716,   2.1792,   4.1365],
+                                               [ -9.5353,  18.1484, -18.4952,  11.8849,  -0.4852, -19.4332],
+                                               [ 33.4754,   1.8713, -29.7474,  15.2129, -11.8344,  -3.7762],
+                                               [ -3.8556, -11.7677,   7.9117, -15.4929,  -7.8825,  24.6944],
+                                               [  4.6192, -14.7085, -14.3029,  -9.5128,   8.9717,  -4.3675],
+                                               [  1.8746,   2.8585,  -2.4361,   4.4878,   6.5368,  -2.0876]]],
+                                     
+                                     
+                                             [[[  6.7974,  -3.2412,  -2.0201,   8.6888,   0.8095,  14.0027],
+                                               [  4.8581,   3.3171,  -9.0605, -10.5821,  23.1227, -15.1524],
+                                               [  2.6824,  15.2982,  -9.8007,   3.4209, -10.9109, -24.7141],
+                                               [  8.1563,  -1.3002,  -7.8564,   4.7387,   0.5551,  10.2388],
+                                               [  0.9752,   0.1444,   1.0968,   3.4232,  12.0973,  -2.1200],
+                                               [  4.3388,   1.5527,   1.2354,  -4.0167,   2.2382,  -7.9069]],
+                                     
+                                              [[ -0.1760,   2.5242,  -0.2943,  11.9993,   2.7168, -18.7214],
+                                               [ -1.1104,   3.9235,  -2.1272,  11.0673, -10.8839,   5.8387],
+                                               [  3.1212,  -5.4843,  -6.2836, -16.1657,  -8.6078,  10.2598],
+                                               [ -1.3828,  -1.4117,  -0.3860,  -0.9968,   9.3384,  -9.3984],
+                                               [ -1.5388,   6.7630,  -2.7617,   9.5889,  -3.3946,  -5.1327],
+                                               [  2.4490,  -1.3529,  -2.8353,  -1.3597,  -1.1327,   0.1540]]]])
+
+            let t1s2p1 = combo.tensor([[[[ -3.4577,   3.2779,   2.9547],
+                                           [  2.2602,  -3.8841,   1.4702],
+                                           [  0.2794,  -2.2958,  -3.5196]],
+                                 
+                                          [[  0.1823,  -0.9480,  -0.3327],
+                                           [  0.7481,  -2.4933,  -3.9782],
+                                           [  3.2706,   2.8311,  -4.2914]],
+                                 
+                                          [[-12.7793,  -1.5203,   8.0372],
+                                           [  5.0149,  -9.2994,  -1.8505],
+                                           [  6.6970,  -0.4846,   4.1385]]],
+                                 
+                                 
+                                         [[[  1.8252,  -2.0286,   4.0794],
+                                           [  0.4706,   2.6324,  -0.3310],
+                                           [  0.9786,  -0.9518,  -5.4449]],
+                                 
+                                          [[  3.1169,   0.4747,  -1.1639],
+                                           [ -0.0482,   0.6452,  -1.3964],
+                                           [  1.8278,   0.1934,  -2.0665]],
+                                 
+                                          [[ -7.7843,  -7.3282,   1.5546],
+                                           [ -3.3539,  -1.5674,   0.0477],
+                                           [  2.6323,   6.4161,   6.6779]]]])
+            let t3s2p1 = dsharp.convTranspose2d(t1s2p1, t2, stride=2, padding=1)
+            let t3s2p1Correct = combo.tensor([[[[ 4.1190e+01, -2.2363e+01,  1.7022e+00, -2.3258e+00, -2.6575e+01],
+                                                   [ 1.6455e+01, -3.0412e+01, -5.9070e+00, -1.7994e+01, -7.7708e+00],
+                                                   [-1.7504e+01,  9.6857e+00,  3.2733e+01, -1.1493e+01,  6.0931e+00],
+                                                   [-4.2322e+00,  1.7293e+01,  8.0773e+00, -3.5520e+01, -2.1028e+00],
+                                                   [-2.1819e+01,  8.3739e+00,  2.2502e+00,  6.4162e+00, -6.4449e+00]],
+                                         
+                                                  [[-9.2576e+00, -6.9672e+00, -4.0686e+00, -7.5565e+00,  5.1920e+00],
+                                                   [-1.6725e+01, -1.2407e+01,  4.7072e+00, -1.5580e+00,  8.3889e+00],
+                                                   [ 3.1375e+00,  2.0321e+01, -6.2893e+00, -6.4338e+00, -3.6882e+00],
+                                                   [ 2.2704e+00,  7.6596e+00, -5.7023e+00, -1.8606e+01,  1.8768e+00],
+                                                   [ 6.7321e+00,  4.2171e+00,  1.8832e+00, -6.5496e+00,  5.5370e+00]]],
+                                         
+                                         
+                                                 [[[ 1.9554e+01, -1.3538e+01,  2.3471e+01, -1.4669e+01, -8.0876e+00],
+                                                   [ 5.9935e+00, -1.4617e+01,  1.0030e+01, -1.6091e+01, -4.0002e+00],
+                                                   [ 9.4754e+00, -9.2002e+00,  1.6508e+00, -1.4299e+00,  9.2735e-01],
+                                                   [ 4.1938e+00, -6.5662e+00,  1.6308e-01, -1.0217e+01, -4.1529e+00],
+                                                   [-9.7351e+00,  6.0219e+00, -1.8128e+01,  1.9980e+01, -1.3195e+01]],
+                                         
+                                                  [[-7.9177e+00,  1.2681e+01, -5.1935e+00, -7.1072e+00, -1.8560e+00],
+                                                   [-1.0267e+01, -1.7580e+00, -8.9404e+00, -1.5384e+01,  2.9346e+00],
+                                                   [-3.4798e+00,  1.7023e+00, -3.2818e+00,  4.4891e+00, -1.3794e-02],
+                                                   [-4.7645e+00, -7.2907e+00, -4.1505e+00, -2.3790e+00,  1.4794e+00],
+                                                   [ 2.1159e+00, -6.0181e+00,  6.7175e+00, -8.3804e+00,  9.8221e+00]]]])
+
+            let t1s23p32 = combo.tensor([[[[ 0.0000,  0.0000,  0.0000],
+                                           [-3.2326, -1.2749, -3.3366],
+                                           [-1.7567, -0.9686, -2.1970],
+                                           [-1.4939,  2.3154, -0.4978],
+                                           [ 5.1554, -0.8580, -1.6888]],
+                                 
+                                          [[ 0.0000,  0.0000,  0.0000],
+                                           [-1.6604, -0.3488,  1.1702],
+                                           [-2.1695, -0.4674,  4.5114],
+                                           [ 0.6170,  0.3235,  4.8016],
+                                           [ 3.4517,  0.1421,  1.8764]],
+                                 
+                                          [[ 0.0000,  0.0000,  0.0000],
+                                           [-2.1929, -4.4554,  2.9319],
+                                           [ 3.2436,  8.7959,  1.2112],
+                                           [ 3.8262,  3.5775,  5.6113],
+                                           [-1.9036, -1.5468,  0.0142]]],
+                                 
+                                 
+                                         [[[ 0.0000,  0.0000,  0.0000],
+                                           [-1.5589, -0.6350,  0.7208],
+                                           [ 4.4022,  0.2401,  4.6891],
+                                           [-1.1714, -9.2079, -4.1885],
+                                           [ 1.9395,  5.5157,  3.1695]],
+                                 
+                                          [[ 0.0000,  0.0000,  0.0000],
+                                           [ 0.8601,  0.7594,  2.9743],
+                                           [ 3.9042, -0.1467,  1.2048],
+                                           [ 0.3783, -3.1536,  5.7121],
+                                           [ 1.4443,  1.0067,  4.0964]],
+                                 
+                                          [[ 0.0000,  0.0000,  0.0000],
+                                           [ 0.8332, -8.5063, -0.7146],
+                                           [-3.2521, -4.7905, -0.4381],
+                                           [-0.6507,  4.6023, -2.5422],
+                                           [-1.2853, -0.8996,  0.0497]]]])
+            let t3s23p32 = dsharp.convTranspose2d(t1s23p32, t2, strides=[2;3], paddings=[3;2])
+            let t3s23p32Correct = combo.tensor([[[[ -1.8653,   0.2227,  14.6477,  -6.0192,   4.7756],
+                                                   [-13.3418,  -2.7359,   5.2295, -16.5054,  14.1446],
+                                                   [  6.1026,   4.8817, -24.8045,  12.7971,   0.5372],
+                                                   [  2.5602,   1.7874,  -4.4904,  23.0840,  14.9680],
+                                                   [  5.0679,  -1.9103, -13.0832,   4.7500,  -0.2295]],
+                                         
+                                                  [[ -6.8511,   6.6723,  -3.2659,  -3.7369,  -4.8906],
+                                                   [  5.9968,  -5.4707,  -7.2737,   9.2400,   4.7367],
+                                                   [ -2.9852, -12.8116,   8.7978,   1.1795,  -3.6593],
+                                                   [  6.1712,   4.0169,   7.1913,   2.4910,   4.5172],
+                                                   [ -0.6795,  -5.3371,   1.6673,   5.0634, -10.2140]]],
+                                         
+                                         
+                                                 [[[  0.6744,  -2.8392,  25.4059, -12.5133,  -3.0779],
+                                                   [ 16.9127,  -1.2134,   7.4218, -16.6941,   1.7505],
+                                                   [ -7.3768,  -2.0289,  14.0095,  -6.6914,  -7.1047],
+                                                   [ -7.0010,  -2.8175,  -4.1471, -34.6982,  11.0266],
+                                                   [ -1.1221,  15.8524,  -2.6974,   8.9922,   1.0017]],
+                                         
+                                                  [[ -1.6517,  12.2203,  -7.2672,  -3.4317,  -0.1174],
+                                                   [-16.0018,   1.0966,  -8.0625,  -9.1513,   7.0926],
+                                                   [  8.2073,   7.1299,  -4.6647,  -1.3393,   0.3008],
+                                                   [  1.7930, -18.2269,  -2.2634,  21.3948,  -0.9061],
+                                                   [ -1.8381,  -5.7929,  10.4651, -14.8689,   1.3356]]]])
+
+            let t1p1d2 = combo.tensor([[[[ -1.4935,  -0.9143,   1.9049,  -3.4720],
+                                           [ -0.0765,  -6.4800,  -5.8089,   1.8598],
+                                           [ -4.9432,   0.7761,   4.2125,  -2.6577],
+                                           [  3.2395,  -1.6309,   3.0082,   5.5846]],
+                                 
+                                          [[  0.8980,  -2.8900,   0.8966,  -1.4387],
+                                           [ -1.3534,   3.0437,   1.8584,   2.4703],
+                                           [  1.6080,   2.3951,   0.9763,   4.3595],
+                                           [  2.8455,   4.4696,  -0.3192,  -0.7607]],
+                                 
+                                          [[  1.8914,  -2.6172,  -0.7348,   1.3387],
+                                           [  1.5050,   6.0453,  -5.7601,  -5.8269],
+                                           [ -1.9717,   3.9505,  -0.5285,  -4.7867],
+                                           [ -1.6577,  -3.5756,  -2.8567,   1.3185]]],
+                                 
+                                 
+                                         [[[  2.0819,   0.7653,  -1.9882,   1.9447],
+                                           [ -1.2180,   0.8260,  -3.9099,   4.3648],
+                                           [  1.3846,   1.3559,  -1.9401,   4.3954],
+                                           [ -2.5044,   2.0114,   5.6507,   6.7569]],
+                                 
+                                          [[ -0.6521,  -2.0061,  -0.0293,   0.6525],
+                                           [ -1.3767,  -2.5563,  -1.3317,  -0.2047],
+                                           [ -1.4225,   2.7875,   0.7057,  -4.1782],
+                                           [ -2.0456,   1.1288,   3.3816,  -3.9975]],
+                                 
+                                          [[-10.3514,   6.6914,   7.5311,  -4.3119],
+                                           [  5.0292,  12.8169,  -0.9108,  -7.8711],
+                                           [  2.2663,  -4.1982,   0.8442,   5.2652],
+                                           [  2.8034,  -1.7984,  -8.3519,   4.9279]]]])
+            let t3p1d2 = dsharp.convTranspose2d(t1p1d2, t2, padding=1, dilation=2)
+            let t3p1d2Correct = combo.tensor([[[[  8.4979,   8.1578,  -0.9246,  -9.1176, -10.5874,  -6.6205],
+                                                   [  5.2079, -13.8357,  28.0654,  -0.8426,  -2.0303,   7.9319],
+                                                   [ 18.6946,   1.6702, -26.3933,  37.9199,  30.4063,  -2.4531],
+                                                   [ -4.3368,   2.0038, -10.4593,  -2.6176,  11.9491,  -2.7395],
+                                                   [  8.2586,  -1.6561,   0.9127,  12.4730,   1.7779, -16.6218],
+                                                   [  3.3383,   2.1986,   1.1473,  -5.9113,  15.9062,  -0.5134]],
+                                         
+                                                  [[ -7.2998,  -3.4685,   9.0009,   8.7258,  14.5827,   0.4311],
+                                                   [  7.0128,  13.9733,  -1.8878,  -1.3741,   3.4671,  -4.8869],
+                                                   [ -5.7934,  12.6768,  27.1129, -11.6433, -22.6556, -17.3718],
+                                                   [ -6.8525,   2.3969,  11.4891, -13.2466,   4.4319,   8.0043],
+                                                   [  9.2020,   7.0856,  -1.6636,  -5.9827, -16.3868,  -3.3851],
+                                                   [  0.3902,  -6.7436,   3.0319,  -6.3136,  -8.1966,   2.7587]]],
+                                         
+                                         
+                                                 [[[ -7.4104,  -0.1922,   1.2678, -10.2139,  -4.0052,  -8.8859],
+                                                   [  7.7276,  38.4212, -34.7024, -36.3262,  34.2309,   7.4849],
+                                                   [  6.4525,  -5.1866, -56.6429,  10.9949,  52.3020,  18.4749],
+                                                   [ -5.5316,   7.2902,  -0.3738, -26.0990,  -8.4270,  17.7335],
+                                                   [ -2.9813, -20.0934, -20.1420,  36.3213,  10.0719, -17.1758],
+                                                   [  0.5118,  -0.3265,  -2.8530,   4.9610, -15.5394,   2.0790]],
+                                         
+                                                  [[ -7.3016,  -7.5058,   3.0433,  11.1989,  16.5089,   6.5538],
+                                                   [ -2.3799, -26.2130,  11.1512,  10.6192, -17.7198,   2.4928],
+                                                   [-13.2641,  21.5614,  24.2401,  10.6934, -18.6836, -29.0676],
+                                                   [  5.6654,  -8.2564,  -3.2337,  10.4815,   1.4508,  -1.1905],
+                                                   [  3.8546,  24.1948,   3.2743, -13.9923,  -0.1919,   5.1839],
+                                                   [ -1.4752,   5.4015,  -9.5561,   2.3548,   8.7044,  -1.3711]]]])
+
+            let t1p22d23 = combo.tensor([[[[-1.2098e+00, -2.4110e+00, -1.0279e+00, -3.9876e+00],
+                                           [ 5.6019e-01, -1.5290e+00,  1.2401e+00,  1.2266e-01],
+                                           [ 1.9778e+00, -1.5180e+00, -1.3277e+00,  1.1161e+00],
+                                           [ 7.8095e-01,  6.0152e+00, -1.1348e+00, -1.9066e+00],
+                                           [ 2.4955e+00,  3.9095e+00,  1.1106e+00,  1.6221e+00],
+                                           [-4.0381e+00, -3.6661e+00, -1.3509e+00, -4.5592e+00]],
+                                 
+                                          [[-1.0937e+00, -2.0893e-01,  1.9642e+00, -6.0165e-01],
+                                           [-7.3989e-01, -5.2584e+00, -1.4846e+00,  2.1132e-01],
+                                           [ 1.6045e+00, -3.0431e+00,  1.5164e+00,  2.7907e+00],
+                                           [ 3.3791e+00,  5.5568e+00,  1.0130e+00,  4.2790e-01],
+                                           [ 1.2678e+00,  3.2593e+00,  2.7528e+00, -1.6473e+00],
+                                           [-4.9622e+00, -1.8143e+00, -2.2499e+00,  6.0567e-01]],
+                                 
+                                          [[-3.4310e+00, -2.9905e+00,  6.9098e-01, -3.8573e+00],
+                                           [-1.5282e+00,  2.4647e-01,  2.8520e+00,  1.1805e+00],
+                                           [ 4.1877e+00, -1.6244e+00, -3.7407e+00, -4.6168e+00],
+                                           [-1.7002e+00,  1.5955e+00,  6.4699e+00,  2.2116e+00],
+                                           [-5.5796e-01,  1.9423e+00, -1.5028e+00, -1.4009e+00],
+                                           [ 2.4800e+00,  6.2988e-01,  1.3072e+00, -6.6665e+00]]],
+                                 
+                                 
+                                         [[[-3.1804e+00, -5.1830e-01, -1.1245e+00, -2.0020e+00],
+                                           [ 5.1911e-01, -1.7104e+00,  2.2359e+00,  4.3109e-02],
+                                           [-4.8944e+00,  4.8992e+00,  1.6799e+00, -3.3535e+00],
+                                           [ 1.4257e+00,  3.6713e+00, -4.5776e-01,  1.3292e+00],
+                                           [ 2.8698e+00, -1.7510e+00,  5.5438e-01,  5.5704e-01],
+                                           [-1.1954e+00,  6.5019e-01,  1.9188e+00,  8.1933e-02]],
+                                 
+                                          [[-4.6997e-01, -1.3293e+00, -6.7385e-01,  4.6287e+00],
+                                           [-1.6234e+00, -1.0411e+00,  1.0147e+00,  1.0878e-01],
+                                           [-5.8939e-01,  1.6040e+00, -7.2406e-01, -1.0665e+00],
+                                           [ 1.8123e+00,  1.9490e+00, -4.8444e+00, -1.4087e+00],
+                                           [ 5.6853e-01, -2.5669e-01,  3.1855e-01,  3.0923e+00],
+                                           [-9.9076e-01,  4.7172e-03,  2.6959e+00, -1.8670e-01]],
+                                 
+                                          [[-6.2282e+00,  8.8515e-01, -2.2936e+00,  9.4559e-01],
+                                           [-2.9560e+00,  9.6039e-01,  5.5681e+00,  1.3379e+00],
+                                           [-4.0362e+00,  9.9716e+00,  1.6734e+00, -4.0311e+00],
+                                           [ 3.0872e+00, -1.5992e+00, -7.6902e-01,  1.6764e+00],
+                                           [ 4.4828e-01,  2.8493e+00,  5.6855e-01, -5.2895e+00],
+                                           [ 2.8623e+00,  3.1194e+00, -3.9290e+00, -2.4554e+00]]]])
+            let t3p22d23 = dsharp.convTranspose2d(t1p22d23, t2, paddings=[2;2], dilations=[2;3])
+            let t3p22d23Correct = combo.tensor([[[[ 4.6280e+00,  2.4009e+01,  9.5642e+00, -3.9685e+00,  1.6077e+01,
+                                                    -1.0013e+01],
+                                                   [ 1.7769e+00,  6.6014e+00,  9.8511e+00, -8.8935e+00, -5.1148e-01,
+                                                     2.1805e+01],
+                                                   [ 7.0965e+00, -1.6816e+01,  1.6153e+01,  1.2492e+01,  2.1143e+01,
+                                                     3.5813e+00],
+                                                   [-1.7276e+00,  8.6048e+00, -1.6800e+01, -2.2964e+01, -3.5034e+01,
+                                                    -1.3067e+01],
+                                                   [-2.4116e+00, -7.3908e+00, -9.2208e+00,  6.0162e+00,  1.5574e+01,
+                                                    -4.6266e+00],
+                                                   [ 7.8526e+00,  6.3514e+00, -1.7287e+00, -6.4759e+00,  2.7634e+01,
+                                                     8.5755e+00]],
+                                         
+                                                  [[-2.0408e-01,  7.3995e+00,  2.9141e-02,  4.1133e+00, -3.2888e+00,
+                                                    -8.5523e-01],
+                                                   [-6.6055e+00, -6.0599e+00, -2.8484e+00, -3.6159e-01, -5.8471e+00,
+                                                    -1.9475e+01],
+                                                   [ 1.1019e+01,  7.7522e+00, -6.4308e+00, -3.1367e+00, -1.0815e+01,
+                                                     -1.5296e+01],
+                                                   [-1.5233e+01, -7.2742e+00,  5.1823e+00,  1.1571e+01,  2.7042e+01,
+                                                     2.2181e+01],
+                                                   [ 1.4806e+00,  2.1242e+00,  9.2378e-01, -7.1384e+00, -3.4593e+00,
+                                                     7.8981e+00],
+                                                   [ 9.8048e-01,  1.0242e+01, -8.3664e-01,  7.4823e+00, -1.0282e+01,
+                                                     -3.0258e+00]]],
+                                         
+                                         
+                                                 [[[-1.6121e+00,  1.7316e+01,  5.1522e+00,  1.0198e+01, -2.4169e+01,
+                                                     9.4833e+00],
+                                                   [-9.8801e+00,  7.7356e+00,  2.7915e+00, -1.9914e+01, -1.9059e+00,
+                                                     1.2361e+01],
+                                                   [-1.6598e+00,  4.3962e+01, -3.7071e+01, -2.9644e+00,  2.4383e+00,
+                                                     9.3214e+00],
+                                                   [ 9.0554e+00, -9.5691e+00,  1.2193e+00,  3.6328e-01, -1.6113e+01,
+                                                     -2.1219e+00],
+                                                   [-2.1859e+00, -2.8941e+00, -1.8096e+01, -4.8515e+00,  9.2880e+00,
+                                                     2.7867e+01],
+                                                   [-1.0800e+01, -1.3079e+01, -1.0380e+01,  9.0377e+00,  1.7603e+01,
+                                                     1.8126e+00]],
+                                         
+                                                  [[ 4.2091e+00, -6.5818e+00, -5.2048e+00, -3.0196e+00,  2.8719e+00,
+                                                     2.3581e-01],
+                                                   [-1.4303e+01, -7.8386e+00,  8.8451e-01,  3.7105e+00, -2.3244e+00,
+                                                    -1.2665e+01],
+                                                   [-1.0179e+00,  6.6568e+00,  8.0111e+00, -2.0887e+00, -2.1675e+01,
+                                                     1.8483e+01],
+                                                   [ 9.8004e+00, -5.4378e-01, -3.3078e+00,  4.2812e+00,  1.2749e+01,
+                                                     8.1681e+00],
+                                                   [-1.8430e+00,  2.6286e+00,  1.2923e+01,  2.7304e+00, -7.8955e+00,
+                                                     4.8717e+00],
+                                                   [ 4.5538e+00,  7.7849e+00, -1.1294e+00, -4.1387e-01,  1.0196e+00,
+                                                     3.7936e+00]]]])
+
+            let t1s3p6d3 = combo.tensor([[[[-0.4797,  1.2067,  0.8487, -0.9267],
+                                           [ 0.0488,  2.9384, -2.8182, -2.7154],
+                                           [ 0.9480, -2.3075, -4.5708, -2.2337],
+                                           [ 0.1669,  4.3160,  2.9409, -0.7828]],
+                                 
+                                          [[-0.1887,  0.4049, -1.9126,  0.4331],
+                                           [ 0.2998,  0.4966,  1.3509,  2.1225],
+                                           [-0.3169, -2.3733, -4.2170, -0.0781],
+                                           [-0.1093,  2.5067,  3.0689,  5.2431]],
+                                 
+                                          [[-2.0482,  1.2449,  0.3645,  0.2970],
+                                           [ 1.1837,  8.8906, -0.6150, -0.3658],
+                                           [ 1.4408,  2.9900, -8.0328, -0.4368],
+                                           [ 0.8015, -0.6401, -0.4330, -0.6978]]],
+                                 
+                                 
+                                         [[[-0.6182, -0.5837,  0.7181,  0.6395],
+                                           [-1.5513, -1.4997,  0.8532,  0.0916],
+                                           [ 0.0921, -0.2811,  0.0137, -2.8628],
+                                           [-0.1444,  4.6484,  1.7724, -2.7309]],
+                                 
+                                          [[ 0.1016,  1.0336,  0.5866, -0.0869],
+                                           [-0.3539,  0.7336,  1.4618,  1.5993],
+                                           [ 0.6032, -0.6872, -2.0944, -1.2374],
+                                           [-0.0151,  3.2930, -1.2824,  0.5289]],
+                                 
+                                          [[-0.8863, -0.9437, -1.2007, -0.1748],
+                                           [-1.6423,  1.9599, -2.7169,  1.5076],
+                                           [-1.4196,  1.2534, -3.9894,  3.1457],
+                                           [-0.2654, -2.1439,  1.0330,  0.4360]]]])
+            let t3s3p6d3 = dsharp.convTranspose2d(t1s3p6d3, t2, stride=3, padding=6, dilation=3)
+            let t3s3p6d3Correct = combo.tensor([[[[-38.8444,   0.0000,   0.0000,   8.3644],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [  7.0444,   0.0000,   0.0000,  90.9947]],
+                                         
+                                                  [[ -0.5142,   0.0000,   0.0000,  25.4986],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [ 36.8548,   0.0000,   0.0000,  -9.3262]]],
+                                         
+                                         
+                                                 [[[-12.5651,   0.0000,   0.0000,   2.8805],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [-10.1415,   0.0000,   0.0000,  48.2164]],
+                                         
+                                                  [[ -3.6824,   0.0000,   0.0000, -12.8018],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [  0.0000,   0.0000,   0.0000,   0.0000],
+                                                   [  5.7111,   0.0000,   0.0000, -31.2658]]]])
+
+            Assert.That(t3Correct.allclose(t3, 0.01))
+            Assert.That(t3p1Correct.allclose(t3p1, 0.01))
+            Assert.That(t3p12Correct.allclose(t3p12, 0.01))
+            Assert.That(t3s2Correct.allclose(t3s2, 0.01))
+            Assert.That(t3s13Correct.allclose(t3s13, 0.01))
+            Assert.That(t3s2p1Correct.allclose(t3s2p1, 0.01))
+            Assert.That(t3s23p32Correct.allclose(t3s23p32, 0.01))
+            Assert.That(t3p1d2Correct.allclose(t3p1d2, 0.02))
+            Assert.That(t3p22d23Correct.allclose(t3p22d23, 0.01))
+            Assert.That(t3s3p6d3Correct.allclose(t3s3p6d3, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorConvTranspose3D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[[[ 0.9873,  2.7076, -0.9461],
+                                       [-0.0808,  1.5441, -0.8709],
+                                       [-0.8709,  0.3782,  2.0588]],
+
+                                      [[ 1.0087, -0.8291,  0.8613],
+                                       [-0.6963,  0.1493,  0.2307],
+                                       [-0.0230,  1.0297,  1.7398]],
+
+                                      [[ 2.0611, -1.6843, -1.0479],
+                                       [-0.0454, -0.3567,  0.5329],
+                                       [ 1.5642,  0.3775,  1.8207]]]]).unsqueeze(0)
+            let t2 = combo.tensor([[[[-0.6863,  0.6292,  1.2939],
+                                       [ 0.6178, -1.1568, -1.2094],
+                                       [ 0.2491,  1.3155,  0.3311]],
+
+                                      [[-0.1488,  0.1148, -2.6754],
+                                       [ 1.0680,  0.5176,  0.4799],
+                                       [-0.8843, -1.2587, -0.5647]],
+
+                                      [[-0.1586,  0.1037, -0.8961],
+                                       [-0.5436,  0.7449, -1.4694],
+                                       [-0.5542,  0.4589,  0.9205]]],
+
+
+                                     [[[-0.7661,  0.1054,  0.0801],
+                                       [ 0.8272, -0.0132, -2.3537],
+                                       [-0.8411,  0.6373, -0.4968]],
+
+                                      [[ 0.4365,  1.0976, -1.0754],
+                                       [ 0.6496, -0.2016, -0.5867],
+                                       [ 0.7225, -0.6232,  1.1162]],
+
+                                      [[-0.0697, -0.5219, -0.3690],
+                                       [ 1.5946, -0.9011, -0.1317],
+                                       [-0.5122, -1.3610, -0.1057]]]]).unsqueeze(0)
+              
+            let t3 = dsharp.convTranspose3d(t1, t2)
+            let t3Correct = combo.tensor([[[[-0.6776, -1.2371,  3.6305,  2.9081, -1.2242],
+                                               [ 0.6655, -0.5798, -3.4461, -0.7301,  0.0174],
+                                               [ 0.7937,  2.2132, -0.8753,  0.5767,  3.4039],
+                                               [-0.5582,  1.5194,  3.6753, -3.4734, -2.7783],
+                                               [-0.2169, -1.0514,  0.7219,  2.8336,  0.6817]],
+
+                                              [[-0.8392,  0.9142, -1.9974, -7.8834,  3.6458],
+                                               [ 2.1676,  0.9441,  0.6938, -3.0770,  1.1327],
+                                               [-0.9930, -0.8891, -1.5376,  2.0150, -3.1344],
+                                               [-1.0463, -1.5267,  0.7838, -1.4336, -0.5480],
+                                               [ 0.7644,  0.9879, -0.0247, -0.1753, -0.5864]],
+
+                                              [[-1.7213,  2.3650, -1.0495, -3.0462, -2.8125],
+                                               [ 1.9617, -4.6640,  2.4310, -3.3593,  3.9237],
+                                               [-2.5857, -0.1416,  4.5485, -4.4521, -5.1023],
+                                               [ 2.0645, -1.6396,  2.3854,  1.0397, -5.1477],
+                                               [ 0.8926,  0.6609, -3.1227,  1.0417,  1.5156]],
+
+                                              [[-0.4667,  0.7234, -6.6784,  5.2182,  2.0317],
+                                               [ 1.7702,  0.4220, -2.9658,  1.4148, -3.4009],
+                                               [-2.2808, -1.2291, -1.2356,  0.4161, -5.1288],
+                                               [ 2.1092,  0.6063,  2.0487,  0.6804, -1.7714],
+                                               [-1.3705, -2.8840, -3.4814, -0.7586,  0.5735]],
+
+                                              [[-0.3269,  0.4809, -1.8555,  1.4006,  0.9390],
+                                               [-1.1133,  2.5028, -3.7944,  2.0693,  1.0622],
+                                               [-1.3657,  2.1418, -0.4349, -1.2597, -3.3792],
+                                               [-0.8252,  1.1367, -3.5079,  0.7176, -2.1848],
+                                               [-0.8669,  0.5087,  0.6042,  1.1831,  1.6760]]],
+
+
+                                             [[[-0.7564, -1.9702,  1.0893,  0.1172, -0.0758],
+                                               [ 0.8786,  1.0353, -2.3188, -6.3286,  2.1572],
+                                               [-0.2301, -0.7514, -0.1270, -5.3238,  2.6849],
+                                               [-0.6524, -1.0259,  5.5045, -2.2395, -4.4132],
+                                               [ 0.7325, -0.8731, -1.0580,  1.1241, -1.0229]],
+
+                                              [[-0.3418,  3.0070,  0.8309, -3.9259,  1.0865],
+                                               [ 1.9740,  1.2583, -2.2057, -2.0378, -0.5173],
+                                               [-1.1262,  2.2510, -1.0006,  5.6069, -3.5906],
+                                               [-0.0575,  1.8699,  1.8174, -0.7445, -6.3896],
+                                               [-0.6098, -0.0648, -0.5161, -0.2638,  1.4335]],
+
+                                              [[-1.2076,  1.5488, -2.5398,  1.0863, -0.6611],
+                                               [ 3.6711,  0.7693, -9.7912,  4.7919,  2.2017],
+                                               [-3.2770,  1.9780, -3.2797,  0.7986, -2.1776],
+                                               [-0.5332,  2.4850, -1.1911, -2.2108, -5.4925],
+                                               [-0.8862,  2.4291, -2.9556, -1.8043,  0.8196]],
+
+                                              [[ 0.8293,  1.0586, -4.5222,  0.5174,  0.8091],
+                                               [ 2.9762, -3.5933,  0.4902,  1.3255, -0.1569],
+                                               [ 0.5169, -0.9847,  2.8202, -2.1327, -4.2036],
+                                               [ 1.3033,  2.2345,  2.3475, -3.3519, -0.7269],
+                                               [ 1.1419, -1.1981,  0.5359, -3.1900,  1.8482]],
+
+                                              [[-0.1437, -0.9583,  0.1916,  1.1684,  0.3867],
+                                               [ 3.2899, -4.4946, -0.2590,  1.0196, -0.0586],
+                                               [-1.2372, -3.3131,  2.8871,  0.0815, -0.6312],
+                                               [ 2.5176, -0.5630,  2.5744, -2.3779, -0.2962],
+                                               [-0.8012, -2.3222, -1.6117, -2.5178, -0.1925]]]]).unsqueeze(0)
+
+            let t3p1 = dsharp.convTranspose3d(t1, t2, padding=1)
+            let t3p1Correct = combo.tensor([[[[ 0.9441,  0.6938, -3.0770],
+                                               [-0.8891, -1.5376,  2.0150],
+                                               [-1.5267,  0.7838, -1.4336]],
+
+                                              [[-4.6640,  2.4310, -3.3593],
+                                               [-0.1416,  4.5485, -4.4521],
+                                               [-1.6396,  2.3854,  1.0397]],
+
+                                              [[ 0.4220, -2.9658,  1.4148],
+                                               [-1.2291, -1.2356,  0.4161],
+                                               [ 0.6063,  2.0487,  0.6804]]],
+
+
+                                             [[[ 1.2583, -2.2057, -2.0378],
+                                               [ 2.2510, -1.0006,  5.6069],
+                                               [ 1.8699,  1.8174, -0.7445]],
+
+                                              [[ 0.7693, -9.7912,  4.7919],
+                                               [ 1.9780, -3.2797,  0.7986],
+                                               [ 2.4850, -1.1911, -2.2108]],
+
+                                              [[-3.5933,  0.4902,  1.3255],
+                                               [-0.9847,  2.8202, -2.1327],
+                                               [ 2.2345,  2.3475, -3.3519]]]]).unsqueeze(0)
+
+            let t3p122 = dsharp.convTranspose3d(t1, t2, paddings=[1; 2; 2])
+            let t3p122Correct = combo.tensor([[[[-1.5376]],
+
+                                                  [[ 4.5485]],
+
+                                                  [[-1.2356]]],
+
+
+                                                 [[[-1.0006]],
+
+                                                  [[-3.2797]],
+
+                                                  [[ 2.8202]]]]).unsqueeze(0)
+
+            let t3s2 = dsharp.convTranspose3d(t1, t2, stride=2)
+            let t3s2Correct = combo.tensor([[[[-6.7761e-01,  6.2121e-01, -5.8084e-01,  1.7037e+00,  4.1528e+00,
+                                                -5.9531e-01, -1.2242e+00],
+                                               [ 6.0999e-01, -1.1421e+00,  4.7885e-01, -3.1322e+00, -3.8592e+00,
+                                                 1.0945e+00,  1.1442e+00],
+                                               [ 3.0137e-01,  1.2479e+00, -1.6300e-01,  4.5334e+00,  3.2566e+00,
+                                                 -1.7926e+00, -1.4401e+00],
+                                               [-4.9924e-02,  9.3474e-02,  1.0517e+00, -1.7862e+00, -2.4055e+00,
+                                                 1.0074e+00,  1.0532e+00],
+                                               [ 5.7757e-01, -6.5425e-01, -1.0286e+00,  2.2692e+00, -6.2927e-01,
+                                                 1.4977e-01,  2.3755e+00],
+                                               [-5.3806e-01,  1.0074e+00,  1.2869e+00, -4.3751e-01,  8.1462e-01,
+                                                -2.3816e+00, -2.4899e+00],
+                                               [-2.1691e-01, -1.1456e+00, -1.9417e-01,  4.9752e-01,  6.3803e-01,
+                                                 2.7084e+00,  6.8174e-01]],
+
+                                              [[-1.4691e-01,  1.1336e-01, -3.0443e+00,  3.1089e-01, -7.1032e+00,
+                                                -1.0863e-01,  2.5313e+00],
+                                               [ 1.0545e+00,  5.1107e-01,  3.3656e+00,  1.4016e+00,  2.8882e-01,
+                                                 -4.8976e-01, -4.5402e-01],
+                                               [-8.6109e-01, -1.2520e+00, -2.9655e+00, -3.2308e+00, -4.6937e+00,
+                                                 1.0909e+00,  2.8642e+00],
+                                               [-8.6301e-02, -4.1827e-02,  1.6104e+00,  7.9930e-01, -1.8915e-01,
+                                                -4.5081e-01, -4.1791e-01],
+                                               [ 2.0104e-01,  1.7161e-03,  9.5375e-01, -1.9002e+00, -1.4199e+00,
+                                                 1.3326e+00, -5.0164e+00],
+                                               [-9.3011e-01, -4.5080e-01, -1.3973e-02,  1.9577e-01,  2.3804e+00,
+                                                 1.0657e+00,  9.8797e-01],
+                                               [ 7.7015e-01,  1.0962e+00,  1.5728e-01, -4.7605e-01, -2.0343e+00,
+                                                 -2.5915e+00, -1.1625e+00]],
+
+                                              [[-8.4890e-01,  7.3710e-01,  5.6003e-01, -2.4080e-01, -3.9402e+00,
+                                                 4.4382e-01,  1.9623e+00],
+                                               [ 8.6515e-02, -4.3149e-01, -4.6548e+00,  2.9759e+00, -1.9294e+00,
+                                                 -1.7011e+00,  3.4855e-01],
+                                               [ 1.9484e-01,  1.3335e+00, -1.6401e+00,  4.0610e-01,  1.7460e+00,
+                                                 7.5368e-01,  4.9317e-01],
+                                               [-3.8631e-01,  7.4535e-01,  2.1371e-01,  9.7747e-01, -1.8335e+00,
+                                                -9.1553e-01,  1.0007e+00],
+                                               [ 2.5271e-02, -1.0580e+00, -1.1396e+00,  1.5921e+00,  1.4837e+00,
+                                                 1.2120e+00, -3.1902e-01],
+                                               [ 4.5918e-01, -6.2202e-01,  1.7381e+00, -9.0946e-01, -1.8453e+00,
+                                                 -4.7914e-01, -5.1294e+00],
+                                               [ 4.7689e-01, -4.2998e-01, -7.6240e-01,  1.5281e+00, -1.8506e-02,
+                                                 3.2336e+00,  2.4713e+00]],
+
+                                              [[-1.5009e-01,  1.1582e-01, -2.5754e+00, -9.5193e-02,  2.0899e+00,
+                                                 9.8899e-02, -2.3044e+00],
+                                               [ 1.0773e+00,  5.2215e-01, -4.0141e-01, -4.2916e-01,  5.2209e-01,
+                                                 4.4587e-01,  4.1333e-01],
+                                               [-7.8844e-01, -1.3496e+00,  2.0044e+00,  1.0607e+00, -7.2726e-01,
+                                                -1.0577e+00, -1.1035e+00],
+                                               [-7.4372e-01, -3.6046e-01, -1.7473e-01,  7.7268e-02,  3.1800e-01,
+                                                 1.1941e-01,  1.1070e-01],
+                                               [ 6.1924e-01,  8.7386e-01,  1.6961e-01, -6.9657e-02, -3.3020e+00,
+                                                 -9.0590e-02, -4.7850e+00],
+                                               [-2.4604e-02, -1.1925e-02,  1.0887e+00,  5.3302e-01,  2.3523e+00,
+                                                 9.0061e-01,  8.3490e-01],
+                                               [ 2.0373e-02,  2.8997e-02, -8.9760e-01, -1.2961e+00, -2.1200e+00,
+                                                 -2.1900e+00, -9.8242e-01]],
+
+                                              [[-1.5746e+00,  1.4015e+00,  3.0505e+00, -1.1458e+00, -8.5383e-01,
+                                                -5.6999e-01, -2.1277e+00],
+                                               [ 7.2511e-01, -1.6330e+00, -4.5649e+00,  1.3309e+00,  2.1396e+00,
+                                                 1.8538e+00,  1.6567e-03],
+                                               [ 9.5978e-02,  3.0736e+00,  2.4374e+00, -2.8052e+00, -3.0569e+00,
+                                                 -6.2395e-01,  9.2870e-01],
+                                               [ 3.5049e-01, -4.6614e-01,  7.7661e-01,  5.2383e-01,  4.1591e-01,
+                                                 -4.4464e-01, -9.8344e-01],
+                                               [-6.9532e-01,  6.0249e-01,  7.9457e-01, -5.6395e-02, -1.9356e+00,
+                                                 2.1329e+00,  1.1855e+00],
+                                               [ 9.7896e-01, -1.8267e+00, -2.1844e+00,  3.3026e-01, -1.7905e+00,
+                                                 -8.1025e-01, -4.7584e+00],
+                                               [ 4.0237e-01,  2.0471e+00,  2.0140e-02,  9.6920e-01,  5.6216e-01,
+                                                 3.1936e+00,  2.2044e+00]],
+
+                                              [[-3.0669e-01,  2.3666e-01, -5.2638e+00, -1.9339e-01,  4.6621e+00,
+                                                -1.2032e-01,  2.8035e+00],
+                                               [ 2.2014e+00,  1.0669e+00, -8.0981e-01, -8.7187e-01, -1.9274e+00,
+                                                 -5.4243e-01, -5.0285e-01],
+                                               [-1.8160e+00, -2.5996e+00,  5.0024e-01,  2.0791e+00,  2.7528e+00,
+                                                 1.3802e+00, -8.3402e-01],
+                                               [-4.8506e-02, -2.3509e-02, -4.0277e-01, -1.8465e-01,  3.9798e-01,
+                                                 2.7585e-01,  2.5572e-01],
+                                               [-1.9259e-01,  2.3677e-01, -3.9000e+00,  4.9234e-01, -1.5508e+00,
+                                                -4.6172e-01, -5.1720e+00],
+                                               [ 1.6706e+00,  8.0970e-01,  1.1538e+00,  1.9542e-01,  2.1257e+00,
+                                                 9.4246e-01,  8.7370e-01],
+                                               [-1.3833e+00, -1.9689e+00, -1.2171e+00, -4.7519e-01, -1.8233e+00,
+                                                -2.2917e+00, -1.0281e+00]],
+
+                                              [[-3.2691e-01,  2.1379e-01, -1.5799e+00, -1.7471e-01,  1.6755e+00,
+                                                -1.0869e-01,  9.3903e-01],
+                                               [-1.1205e+00,  1.5353e+00, -2.1130e+00, -1.2546e+00,  3.0446e+00,
+                                                -7.8053e-01,  1.5398e+00],
+                                               [-1.1351e+00,  9.4124e-01,  2.9280e+00, -8.1000e-01, -7.3459e-01,
+                                                -4.2564e-01, -1.4421e+00],
+                                               [ 2.4689e-02, -3.3828e-02,  2.6065e-01, -2.6570e-01,  2.3445e-01,
+                                                 3.9693e-01, -7.8304e-01],
+                                               [-2.2292e-01,  1.4141e-01, -1.3057e+00, -1.2455e-01, -1.2508e+00,
+                                                 4.3342e-01, -1.1410e+00],
+                                               [-8.5033e-01,  1.1651e+00, -2.5037e+00,  2.8120e-01, -1.5445e+00,
+                                                 1.3562e+00, -2.6753e+00],
+                                               [-8.6687e-01,  7.1788e-01,  1.2307e+00,  1.7326e-01, -6.6148e-01,
+                                                 8.3559e-01,  1.6760e+00]]],
+
+
+                                             [[[-7.5636e-01,  1.0406e-01, -1.9952e+00,  2.8539e-01,  9.4179e-01,
+                                                -9.9724e-02, -7.5817e-02],
+                                               [ 8.1670e-01, -1.3030e-02, -8.4078e-02, -3.5733e-02, -7.1557e+00,
+                                                 1.2486e-02,  2.2269e+00],
+                                               [-7.6851e-01,  6.2065e-01, -3.9573e+00,  1.8882e+00,  2.4143e-01,
+                                                -6.9473e-01,  4.0029e-01],
+                                               [-6.6841e-02,  1.0664e-03,  1.4675e+00, -2.0378e-02, -4.3548e+00,
+                                                 1.1493e-02,  2.0498e+00],
+                                               [ 7.3512e-01, -1.4328e-01, -1.6181e+00,  1.0239e+00, -1.5816e+00,
+                                                 -3.3798e-01,  5.9767e-01],
+                                               [-7.2039e-01,  1.1493e-02,  2.3626e+00, -4.9912e-03,  8.1287e-01,
+                                                -2.7170e-02, -4.8459e+00],
+                                               [ 7.3248e-01, -5.5497e-01,  1.1458e-01,  2.4101e-01, -1.9196e+00,
+                                                 1.3120e+00, -1.0229e+00]],
+
+                                              [[ 4.3095e-01,  1.0837e+00,  1.2014e-01,  2.9720e+00, -3.3247e+00,
+                                                 -1.0385e+00,  1.0174e+00],
+                                               [ 6.4138e-01, -1.9905e-01,  1.1797e+00, -5.4590e-01, -2.2031e+00,
+                                                 1.9075e-01,  5.5506e-01],
+                                               [ 6.7800e-01, -7.0397e-01,  3.8190e+00,  7.4930e-03,  2.9795e-01,
+                                                 -3.6628e-01, -1.1950e-01],
+                                               [-5.2492e-02,  1.6291e-02,  1.0505e+00, -3.1132e-01, -1.4716e+00,
+                                                 1.7558e-01,  5.1091e-01],
+                                               [-4.3850e-01, -9.0554e-01,  2.1269e+00, -5.4716e-01,  1.5862e+00,
+                                                 2.8025e+00, -3.1860e+00],
+                                               [-5.6574e-01,  1.7558e-01,  7.5659e-01, -7.6251e-02,  1.1156e+00,
+                                                -4.1509e-01, -1.2078e+00],
+                                               [-6.2916e-01,  5.4272e-01, -6.9879e-01, -2.3569e-01,  1.9095e+00,
+                                                -1.2830e+00,  2.2979e+00]],
+
+                                              [[-8.4161e-01, -4.0897e-01,  1.6287e-01, -1.5005e+00, -1.6594e+00,
+                                                 5.8459e-01,  4.1813e-01],
+                                               [ 2.4088e+00, -9.0297e-01,  1.1275e+00, -2.4289e+00,  7.9854e-01,
+                                                 8.4120e-01, -1.9027e+00],
+                                               [-8.1502e-01, -7.3211e-01, -1.5431e+00, -5.0035e+00, -7.8804e-01,
+                                                 2.3154e+00,  1.1917e-02],
+                                               [-7.0487e-01,  8.2002e-02,  4.2354e+00, -1.3934e+00, -1.7526e+00,
+                                                 7.8171e-01, -4.2824e-01],
+                                               [ 7.0546e-01,  1.1831e-01, -1.0576e+00, -2.0953e+00, -1.5189e+00,
+                                                 4.4112e-01, -6.4277e-01],
+                                               [-1.4078e+00,  7.8505e-01,  1.6238e+00, -3.5439e-01,  2.2488e+00,
+                                                -1.8782e+00, -4.3663e+00],
+                                               [ 4.6543e-01,  1.1706e+00, -9.5626e-01,  1.4146e-01, -3.0695e+00,
+                                                 -1.6933e+00, -1.0821e+00]],
+
+                                              [[ 4.4030e-01,  1.1072e+00, -1.4466e+00, -9.1001e-01,  1.2675e+00,
+                                                 9.4543e-01, -9.2626e-01],
+                                               [ 6.5529e-01, -2.0337e-01, -1.1304e+00,  1.6715e-01,  1.0459e+00,
+                                                 -1.7366e-01, -5.0531e-01],
+                                               [ 4.2479e-01, -1.3930e+00,  1.3409e+00,  6.8051e-01, -3.6292e-01,
+                                                 -2.8358e-01,  7.1332e-01],
+                                               [-4.5237e-01,  1.4039e-01,  5.0549e-01, -3.0095e-02,  6.2284e-02,
+                                                -4.6508e-02, -1.3533e-01],
+                                               [-5.1313e-01,  4.0867e-01, -1.9516e-01,  1.0372e+00, -1.4634e-02,
+                                                 1.7659e+00, -1.6135e+00],
+                                               [-1.4965e-02,  4.6446e-03,  6.8244e-01, -2.0760e-01,  5.2616e-01,
+                                                -3.5078e-01, -1.0207e+00],
+                                               [-1.6643e-02,  1.4356e-02,  7.1819e-01, -6.4171e-01,  2.4062e+00,
+                                                -1.0843e+00,  1.9419e+00]],
+
+                                              [[-1.6494e+00, -3.0921e-01,  1.1411e+00,  2.5517e-01,  9.1365e-01,
+                                                -5.5999e-01, -4.0179e-01],
+                                               [ 3.3135e+00, -9.3616e-01, -7.6996e+00,  7.6930e-01,  4.5803e+00,
+                                                 -7.6233e-01,  2.3530e+00],
+                                               [-2.1669e+00,  2.9930e-01,  1.2267e+00, -6.0504e-02,  8.5671e-01,
+                                                -1.9043e+00,  3.8715e-01],
+                                               [-1.1480e+00,  6.2808e-01,  1.4158e-01, -1.2980e-01,  1.6286e+00,
+                                                -2.1490e-01, -1.2847e+00],
+                                               [-8.0186e-01,  1.0957e+00,  9.2596e-02, -9.2809e-01, -2.2707e+00,
+                                                -6.9050e-01, -7.8523e-01],
+                                               [ 1.2572e+00,  1.1544e-04, -1.7244e+00, -9.3285e-01,  3.2562e+00,
+                                                 -1.5918e+00, -4.5146e+00],
+                                               [-1.3039e+00,  1.0282e+00, -1.6197e+00, -1.1608e+00, -2.7190e+00,
+                                                -1.2076e+00, -1.0886e+00]],
+
+                                              [[ 8.9968e-01,  2.2624e+00, -2.9517e+00, -1.8488e+00,  1.3539e+00,
+                                                 -1.1502e+00,  1.1269e+00],
+                                               [ 1.3390e+00, -4.1556e-01, -2.3034e+00,  3.3958e-01,  3.0738e-01,
+                                                 2.1127e-01,  6.1475e-01],
+                                               [ 1.4693e+00, -1.3344e+00,  9.7688e-01,  6.5812e-01, -2.0208e+00,
+                                                 1.2380e+00, -1.7427e+00],
+                                               [-2.9503e-02,  9.1565e-03, -2.0508e-01,  7.1917e-02,  5.5545e-01,
+                                                -1.0744e-01, -3.1263e-01],
+                                               [ 6.4996e-01,  1.7452e+00, -1.8257e+00,  6.3668e-01,  3.7559e-01,
+                                                 1.6663e+00, -1.3631e+00],
+                                               [ 1.0162e+00, -3.1537e-01, -6.7241e-01, -7.6114e-02,  9.6129e-01,
+                                                 -3.6708e-01, -1.0681e+00],
+                                               [ 1.1301e+00, -9.7481e-01,  2.0186e+00, -2.3527e-01,  1.7367e+00,
+                                                 -1.1346e+00,  2.0322e+00]],
+
+                                              [[-1.4373e-01, -1.0757e+00, -6.4308e-01,  8.7907e-01,  6.9455e-01,
+                                                 5.4691e-01,  3.8665e-01],
+                                               [ 3.2868e+00, -1.8573e+00, -2.9573e+00,  1.5177e+00, -1.4491e+00,
+                                                 9.4426e-01,  1.3803e-01],
+                                               [-1.0525e+00, -2.7815e+00,  6.8638e-01,  2.4785e+00,  8.0928e-01,
+                                                 1.1480e+00, -8.5826e-02],
+                                               [-7.2421e-02,  4.0924e-02, -5.6283e-01,  3.2143e-01,  8.9676e-01,
+                                                -4.8020e-01, -7.0194e-02],
+                                               [-8.5815e-02, -7.5458e-01, -4.1599e-01,  2.8844e-01, -5.0149e-01,
+                                                -1.6755e+00, -7.2815e-01],
+                                               [ 2.4943e+00, -1.4095e+00,  3.9596e-01, -3.4019e-01,  2.8536e+00,
+                                                 -1.6406e+00, -2.3982e-01],
+                                               [-8.0118e-01, -2.1289e+00, -3.5876e-01, -5.1380e-01, -9.7246e-01,
+                                                -2.4779e+00, -1.9252e-01]]]]).unsqueeze(0)
+
+            let t3s132 = dsharp.convTranspose3d(t1, t2, strides=[1;3;2])
+            let t3s132Correct = combo.tensor([[[[-6.7761e-01,  6.2121e-01, -5.8084e-01,  1.7037e+00,  4.1528e+00,
+                                                  -5.9531e-01, -1.2242e+00],
+                                                 [ 6.0999e-01, -1.1421e+00,  4.7885e-01, -3.1322e+00, -3.8592e+00,
+                                                   1.0945e+00,  1.1442e+00],
+                                                 [ 2.4591e-01,  1.2988e+00,  1.0013e+00,  3.5619e+00,  6.6093e-01,
+                                                   -1.2446e+00, -3.1330e-01],
+                                                 [ 5.5458e-02, -5.0842e-02, -1.1643e+00,  9.7157e-01,  2.5957e+00,
+                                                   -5.4796e-01, -1.1268e+00],
+                                                 [-4.9924e-02,  9.3474e-02,  1.0517e+00, -1.7862e+00, -2.4055e+00,
+                                                   1.0074e+00,  1.0532e+00],
+                                                 [-2.0126e-02, -1.0630e-01,  3.5784e-01,  2.0313e+00,  2.9439e-01,
+                                                  -1.1456e+00, -2.8838e-01],
+                                                 [ 5.9770e-01, -5.4795e-01, -1.3864e+00,  2.3797e-01, -9.2366e-01,
+                                                   1.2954e+00,  2.6639e+00],
+                                                 [-5.3806e-01,  1.0074e+00,  1.2869e+00, -4.3751e-01,  8.1462e-01,
+                                                  -2.3816e+00, -2.4899e+00],
+                                                 [-2.1691e-01, -1.1456e+00, -1.9417e-01,  4.9752e-01,  6.3803e-01,
+                                                   2.7084e+00,  6.8174e-01]],
+
+                                                [[-8.3922e-01,  7.4805e-01, -1.1701e+00, -2.1076e-01, -8.7671e+00,
+                                                   4.3332e-01,  3.6458e+00],
+                                                 [ 1.6777e+00, -6.5582e-01,  1.6334e+00,  2.3607e+00,  1.8237e+00,
+                                                   -1.4862e+00, -1.4957e+00],
+                                                 [-6.2186e-01,  8.4231e-02, -2.8244e+00, -4.4988e+00, -7.5218e-01,
+                                                   2.3240e+00,  8.1946e-01],
+                                                 [ 4.8995e-01, -4.4742e-01, -1.0170e+00,  2.7122e-01, -3.9667e+00,
+                                                   4.5149e-02,  2.6284e+00],
+                                                 [-5.1653e-01,  7.6371e-01,  2.5448e+00,  6.2662e-01, -2.2715e-01,
+                                                  -7.1766e-01, -6.9689e-01],
+                                                 [-1.0198e-01, -8.1433e-01, -1.5133e+00, -1.7472e+00,  5.1406e-03,
+                                                   1.3997e+00,  5.6814e-01],
+                                                 [ 1.4539e-01, -1.1449e-01,  1.5371e+00,  6.9132e-01, -1.1799e+00,
+                                                   1.3311e+00, -3.2570e+00],
+                                                 [-9.4435e-01, -4.2415e-01,  6.5008e-01, -9.9539e-01,  2.2100e+00,
+                                                  -9.4693e-01, -1.1162e+00],
+                                                 [ 7.6441e-01,  1.0659e+00,  4.0613e-01,  8.7852e-01, -1.2599e+00,
+                                                   -3.0272e-01, -5.8641e-01]],
+
+                                                [[-1.7213e+00,  1.5151e+00, -6.6590e-02, -8.7412e-01, -1.6465e+00,
+                                                  -6.5858e-01, -2.8125e+00],
+                                                 [ 1.8141e+00, -1.1268e+00, -6.8575e+00,  3.5361e+00, -1.5526e+00,
+                                                   9.5334e-01,  3.0709e+00],
+                                                 [-9.2582e-01,  1.8949e+00, -1.6511e-01,  7.0500e-02,  1.9044e+00,
+                                                  -2.8969e+00, -1.7043e+00],
+                                                 [ 1.4760e-01, -1.1691e-01,  1.8544e+00, -4.7139e-02, -2.5066e+00,
+                                                   2.7146e-01,  8.5278e-01],
+                                                 [-7.2786e-01, -3.6811e-01, -1.0609e+00,  1.6401e+00, -7.1685e-01,
+                                                  -1.1457e+00,  7.4589e-01],
+                                                 [ 6.4928e-01,  7.7968e-01, -7.7280e-01,  5.1527e-02,  1.6304e+00,
+                                                   1.0984e-02, -7.5546e-01],
+                                                 [-9.3201e-01,  8.9124e-01,  2.3937e+00,  3.9500e-01, -4.4403e+00,
+                                                   1.5589e+00, -4.1439e+00],
+                                                 [ 1.4152e+00, -2.4701e+00,  5.0425e-01,  3.7800e-01,  1.3457e+00,
+                                                   3.2795e-01, -4.3923e+00],
+                                                 [ 8.9260e-01,  1.6870e+00, -1.2969e+00, -6.2591e-01, -2.3344e+00,
+                                                   1.1500e+00,  1.5156e+00]],
+
+                                                [[-4.6668e-01,  3.4129e-01, -6.0363e+00, -2.7939e-01,  5.2685e+00,
+                                                  -3.0976e-02,  2.0317e+00],
+                                                 [ 1.6530e+00,  1.8183e+00, -1.8413e+00, -1.4894e+00, -1.1774e+00,
+                                                   9.9144e-02, -1.7685e+00],
+                                                 [-2.3818e+00, -2.1315e+00,  1.7137e+00,  1.7396e+00,  6.3724e-01,
+                                                   1.7143e+00,  1.3846e+00],
+                                                 [ 1.1720e-01, -7.7444e-02,  7.7492e-01, -2.5474e-02,  7.0469e-01,
+                                                   8.5115e-02, -1.6324e+00],
+                                                 [ 3.3004e-01, -5.4219e-01,  5.3930e-01, -7.3462e-02,  5.3244e-02,
+                                                   4.4767e-01, -8.3234e-02],
+                                                 [ 4.2607e-01, -2.6242e-01, -3.8263e-01,  5.1750e-01, -2.6028e-01,
+                                                   -5.6490e-01, -8.8564e-02],
+                                                 [-2.2910e-01,  1.7721e-01, -4.3838e+00,  1.5015e-01, -2.4796e+00,
+                                                   3.8952e-01, -6.4302e+00],
+                                                 [ 1.6832e+00,  7.9254e-01,  6.2792e-01,  9.6240e-01, -3.3314e-01,
+                                                   2.2384e+00, -1.6828e+00],
+                                                 [-1.3705e+00, -1.9795e+00, -1.8090e+00, -2.6188e-03, -1.8396e+00,
+                                                  -1.4932e+00,  5.7349e-01]],
+
+                                                [[-3.2691e-01,  2.1379e-01, -1.5799e+00, -1.7471e-01,  1.6755e+00,
+                                                  -1.0869e-01,  9.3903e-01],
+                                                 [-1.1205e+00,  1.5353e+00, -2.1130e+00, -1.2546e+00,  3.0446e+00,
+                                                  -7.8053e-01,  1.5398e+00],
+                                                 [-1.1423e+00,  9.4595e-01,  2.8308e+00, -7.7300e-01, -9.6972e-01,
+                                                  -4.8092e-01, -9.6460e-01],
+                                                 [ 7.2031e-03, -4.7108e-03,  9.7273e-02, -3.7000e-02,  2.3513e-01,
+                                                   5.5275e-02, -4.7754e-01],
+                                                 [ 2.4689e-02, -3.3828e-02,  2.6065e-01, -2.6570e-01,  2.3445e-01,
+                                                   3.9693e-01, -7.8304e-01],
+                                                 [ 2.5169e-02, -2.0843e-02,  1.5588e-01, -1.6371e-01, -6.2368e-01,
+                                                   2.4457e-01,  4.9054e-01],
+                                                 [-2.4809e-01,  1.6225e-01, -1.4616e+00,  3.9159e-02, -6.2707e-01,
+                                                   1.8885e-01, -1.6315e+00],
+                                                 [-8.5033e-01,  1.1651e+00, -2.5037e+00,  2.8120e-01, -1.5445e+00,
+                                                   1.3562e+00, -2.6753e+00],
+                                                 [-8.6687e-01,  7.1788e-01,  1.2307e+00,  1.7326e-01, -6.6148e-01,
+                                                   8.3559e-01,  1.6760e+00]]],
+
+
+                                               [[[-7.5636e-01,  1.0406e-01, -1.9952e+00,  2.8539e-01,  9.4179e-01,
+                                                  -9.9724e-02, -7.5817e-02],
+                                                 [ 8.1670e-01, -1.3030e-02, -8.4078e-02, -3.5733e-02, -7.1557e+00,
+                                                   1.2486e-02,  2.2269e+00],
+                                                 [-8.3041e-01,  6.2917e-01, -2.7679e+00,  1.7255e+00, -5.4948e-01,
+                                                  -6.0293e-01,  4.7008e-01],
+                                                 [ 6.1903e-02, -8.5168e-03, -1.1894e+00,  1.6275e-01,  7.9091e-01,
+                                                   -9.1792e-02, -6.9787e-02],
+                                                 [-6.6841e-02,  1.0664e-03,  1.4675e+00, -2.0378e-02, -4.3548e+00,
+                                                   1.1493e-02,  2.0498e+00],
+                                                 [ 6.7964e-02, -5.1493e-02, -1.2586e+00,  9.8401e-01, -3.4686e-02,
+                                                   -5.5498e-01,  4.3269e-01],
+                                                 [ 6.6716e-01, -9.1791e-02, -3.5952e-01,  3.9863e-02, -1.5469e+00,
+                                                   2.1700e-01,  1.6498e-01],
+                                                 [-7.2039e-01,  1.1493e-02,  2.3626e+00, -4.9912e-03,  8.1287e-01,
+                                                  -2.7170e-02, -4.8459e+00],
+                                                 [ 7.3248e-01, -5.5497e-01,  1.1458e-01,  2.4101e-01, -1.9196e+00,
+                                                   1.3120e+00, -1.0229e+00]],
+
+                                                [[-3.4182e-01,  1.1900e+00,  8.3611e-01,  2.8846e+00, -4.0510e+00,
+                                                  -9.4772e-01,  1.0865e+00],
+                                                 [ 1.4758e+00, -2.1237e-01, -1.8803e+00, -5.3496e-01,  4.6082e-01,
+                                                   1.7939e-01, -1.4723e+00],
+                                                 [-1.3516e-01,  2.7538e-02,  3.2542e+00, -2.2157e+00,  2.0260e+00,
+                                                   1.1385e+00, -1.4840e+00],
+                                                 [ 4.9819e-01, -1.6209e-01,  5.9073e-01,  1.7106e+00, -2.2054e+00,
+                                                   -9.3160e-01,  9.5501e-01],
+                                                 [-6.2852e-01,  2.5481e-02,  2.8130e+00, -3.1329e-01, -1.6321e+00,
+                                                   1.7254e-01, -3.2043e-02],
+                                                 [ 5.2732e-01, -3.9340e-01,  1.2458e+00, -8.6716e-01,  8.2612e-01,
+                                                   6.8973e-01, -1.0866e+00],
+                                                 [-3.6248e-01, -9.5832e-01,  3.1090e-01,  5.2366e-01, -7.5841e-01,
+                                                   2.4432e+00, -2.0746e+00],
+                                                 [-5.8480e-01,  1.7588e-01,  1.6626e+00, -8.9840e-02,  1.3114e-01,
+                                                  -4.3805e-01, -5.3029e+00],
+                                                 [-6.0978e-01,  5.2804e-01, -1.5534e+00,  4.2050e-01, -6.5459e-02,
+                                                  -1.7431e-01,  1.4335e+00]],
+
+                                                [[-1.2076e+00,  8.0916e-01, -5.4423e-01, -2.5007e+00,  1.0022e+00,
+                                                   1.3288e+00, -6.6113e-01],
+                                                 [ 3.9347e+00, -1.1202e+00, -3.1875e+00, -2.2505e+00,  2.2782e+00,
+                                                   6.9273e-01,  2.0858e+00],
+                                                 [-1.5106e+00, -6.5883e-01, -5.7170e-01, -4.2417e+00,  1.6134e+00,
+                                                   8.3112e-02,  1.5821e+00],
+                                                 [-2.6352e-01, -7.2695e-01,  1.0058e+00, -6.7965e-01, -1.0057e+00,
+                                                   7.6389e-01,  1.1598e-01],
+                                                 [-6.1879e-01,  2.1381e-01,  2.7902e+00, -1.4168e+00, -2.4942e-01,
+                                                   7.3121e-01, -1.2749e+00],
+                                                 [-4.2349e-01,  5.1499e-01, -1.1291e+00, -2.4219e+00,  3.4505e-01,
+                                                   1.3811e+00,  8.4791e-02],
+                                                 [-1.1477e+00,  5.9410e-01,  6.0532e-01,  9.7264e-01, -1.9956e+00,
+                                                   1.0271e+00, -2.4847e+00],
+                                                 [-1.0974e-01,  7.6874e-01, -1.9692e+00, -5.5339e-01,  4.3769e+00,
+                                                  -2.2300e+00, -5.5773e+00],
+                                                 [-8.8625e-01,  2.1964e+00, -4.7814e-01, -9.1585e-01, -4.0720e-01,
+                                                  -2.7260e+00,  8.1963e-01]],
+
+                                                [[ 8.2934e-01,  1.7359e+00, -3.2661e+00, -1.4161e+00,  1.5997e+00,
+                                                   -1.5997e+00,  8.0905e-01],
+                                                 [ 2.9475e+00, -1.3245e+00, -3.7583e+00,  1.0867e+00,  1.7901e+00,
+                                                   -5.6489e-01,  5.0130e-01],
+                                                 [ 9.7242e-01, -2.6574e+00,  1.4017e+00,  2.1780e+00, -2.9905e+00,
+                                                   -5.1923e-01, -1.2607e+00],
+                                                 [ 2.8734e-02,  3.1358e-01,  1.3967e-01, -4.6944e-01,  5.4504e-01,
+                                                   4.6453e-01, -6.5818e-01],
+                                                 [-1.1399e+00,  6.3664e-01,  1.2467e-01, -6.2589e-02,  9.0363e-01,
+                                                  -3.1530e-01, -3.4302e-01],
+                                                 [ 3.2385e-01,  9.7602e-01, -3.1122e-01,  1.9145e-02, -1.4708e-01,
+                                                   -6.4605e-01,  5.7041e-01],
+                                                 [ 6.8437e-01,  1.7290e+00, -1.5806e+00, -1.2304e-01, -1.1253e-01,
+                                                   1.0904e+00, -2.5999e+00],
+                                                 [ 9.7942e-01, -2.9461e-01,  9.7261e-01, -1.0040e+00,  3.6000e+00,
+                                                   -1.9349e+00, -1.2973e+00],
+                                                 [ 1.1419e+00, -9.4346e-01,  1.4937e+00, -1.6367e+00,  7.3671e-01,
+                                                   -3.5025e+00,  1.8482e+00]],
+
+                                                [[-1.4373e-01, -1.0757e+00, -6.4308e-01,  8.7907e-01,  6.9455e-01,
+                                                   5.4691e-01,  3.8665e-01],
+                                                 [ 3.2868e+00, -1.8573e+00, -2.9573e+00,  1.5177e+00, -1.4491e+00,
+                                                   9.4426e-01,  1.3803e-01],
+                                                 [-1.0557e+00, -2.8052e+00,  6.4474e-01,  2.2923e+00,  7.1482e-01,
+                                                   1.4262e+00,  1.1080e-01],
+                                                 [ 3.1670e-03,  2.3703e-02,  4.1632e-02,  1.8617e-01,  9.4458e-02,
+                                                   -2.7813e-01, -1.9663e-01],
+                                                 [-7.2421e-02,  4.0924e-02, -5.6283e-01,  3.2143e-01,  8.9676e-01,
+                                                  -4.8020e-01, -7.0194e-02],
+                                                 [ 2.3262e-02,  6.1810e-02,  1.8750e-01,  4.8547e-01, -2.3523e-01,
+                                                   -7.2527e-01, -5.6349e-02],
+                                                 [-1.0908e-01, -8.1639e-01, -6.0349e-01, -1.9703e-01, -2.6626e-01,
+                                                  -9.5024e-01, -6.7180e-01],
+                                                 [ 2.4943e+00, -1.4095e+00,  3.9596e-01, -3.4019e-01,  2.8536e+00,
+                                                   -1.6406e+00, -2.3982e-01],
+                                                 [-8.0118e-01, -2.1289e+00, -3.5876e-01, -5.1380e-01, -9.7246e-01,
+                                                  -2.4779e+00, -1.9252e-01]]]]).unsqueeze(0)
+
+            let t3s2p1 = dsharp.convTranspose3d(t1, t2, stride=2, padding=1)
+            let t3s2p1Correct = combo.tensor([[[[ 5.1107e-01,  3.3656e+00,  1.4016e+00,  2.8882e-01, -4.8976e-01],
+                                                 [-1.2520e+00, -2.9655e+00, -3.2308e+00, -4.6937e+00,  1.0909e+00],
+                                                 [-4.1827e-02,  1.6104e+00,  7.9930e-01, -1.8915e-01, -4.5081e-01],
+                                                 [ 1.7161e-03,  9.5375e-01, -1.9002e+00, -1.4199e+00,  1.3326e+00],
+                                                 [-4.5080e-01, -1.3973e-02,  1.9577e-01,  2.3804e+00,  1.0657e+00]],
+
+                                                [[-4.3149e-01, -4.6548e+00,  2.9759e+00, -1.9294e+00, -1.7011e+00],
+                                                 [ 1.3335e+00, -1.6401e+00,  4.0610e-01,  1.7460e+00,  7.5368e-01],
+                                                 [ 7.4535e-01,  2.1371e-01,  9.7747e-01, -1.8335e+00, -9.1553e-01],
+                                                 [-1.0580e+00, -1.1396e+00,  1.5921e+00,  1.4837e+00,  1.2120e+00],
+                                                 [-6.2202e-01,  1.7381e+00, -9.0946e-01, -1.8453e+00, -4.7914e-01]],
+
+                                                [[ 5.2215e-01, -4.0141e-01, -4.2916e-01,  5.2209e-01,  4.4587e-01],
+                                                 [-1.3496e+00,  2.0044e+00,  1.0607e+00, -7.2726e-01, -1.0577e+00],
+                                                 [-3.6046e-01, -1.7473e-01,  7.7268e-02,  3.1800e-01,  1.1941e-01],
+                                                 [ 8.7386e-01,  1.6961e-01, -6.9657e-02, -3.3020e+00, -9.0590e-02],
+                                                 [-1.1925e-02,  1.0887e+00,  5.3302e-01,  2.3523e+00,  9.0061e-01]],
+
+                                                [[-1.6330e+00, -4.5649e+00,  1.3309e+00,  2.1396e+00,  1.8538e+00],
+                                                 [ 3.0736e+00,  2.4374e+00, -2.8052e+00, -3.0569e+00, -6.2395e-01],
+                                                 [-4.6614e-01,  7.7661e-01,  5.2383e-01,  4.1591e-01, -4.4464e-01],
+                                                 [ 6.0249e-01,  7.9457e-01, -5.6395e-02, -1.9356e+00,  2.1329e+00],
+                                                 [-1.8267e+00, -2.1844e+00,  3.3026e-01, -1.7905e+00, -8.1025e-01]],
+
+                                                [[ 1.0669e+00, -8.0981e-01, -8.7187e-01, -1.9274e+00, -5.4243e-01],
+                                                 [-2.5996e+00,  5.0024e-01,  2.0791e+00,  2.7528e+00,  1.3802e+00],
+                                                 [-2.3509e-02, -4.0277e-01, -1.8465e-01,  3.9798e-01,  2.7585e-01],
+                                                 [ 2.3677e-01, -3.9000e+00,  4.9234e-01, -1.5508e+00, -4.6172e-01],
+                                                 [ 8.0970e-01,  1.1538e+00,  1.9542e-01,  2.1257e+00,  9.4246e-01]]],
+
+
+                                               [[[-1.9905e-01,  1.1797e+00, -5.4590e-01, -2.2031e+00,  1.9075e-01],
+                                                 [-7.0397e-01,  3.8190e+00,  7.4930e-03,  2.9795e-01, -3.6628e-01],
+                                                 [ 1.6291e-02,  1.0505e+00, -3.1132e-01, -1.4716e+00,  1.7558e-01],
+                                                 [-9.0554e-01,  2.1269e+00, -5.4716e-01,  1.5862e+00,  2.8025e+00],
+                                                 [ 1.7558e-01,  7.5659e-01, -7.6251e-02,  1.1156e+00, -4.1509e-01]],
+
+                                                [[-9.0297e-01,  1.1275e+00, -2.4289e+00,  7.9854e-01,  8.4120e-01],
+                                                 [-7.3211e-01, -1.5431e+00, -5.0035e+00, -7.8804e-01,  2.3154e+00],
+                                                 [ 8.2002e-02,  4.2354e+00, -1.3934e+00, -1.7526e+00,  7.8171e-01],
+                                                 [ 1.1831e-01, -1.0576e+00, -2.0953e+00, -1.5189e+00,  4.4112e-01],
+                                                 [ 7.8505e-01,  1.6238e+00, -3.5439e-01,  2.2488e+00, -1.8782e+00]],
+
+                                                [[-2.0337e-01, -1.1304e+00,  1.6715e-01,  1.0459e+00, -1.7366e-01],
+                                                 [-1.3930e+00,  1.3409e+00,  6.8051e-01, -3.6292e-01, -2.8358e-01],
+                                                 [ 1.4039e-01,  5.0549e-01, -3.0095e-02,  6.2284e-02, -4.6508e-02],
+                                                 [ 4.0867e-01, -1.9516e-01,  1.0372e+00, -1.4634e-02,  1.7659e+00],
+                                                 [ 4.6446e-03,  6.8244e-01, -2.0760e-01,  5.2616e-01, -3.5078e-01]],
+
+                                                [[-9.3616e-01, -7.6996e+00,  7.6930e-01,  4.5803e+00, -7.6233e-01],
+                                                 [ 2.9930e-01,  1.2267e+00, -6.0504e-02,  8.5671e-01, -1.9043e+00],
+                                                 [ 6.2808e-01,  1.4158e-01, -1.2980e-01,  1.6286e+00, -2.1490e-01],
+                                                 [ 1.0957e+00,  9.2596e-02, -9.2809e-01, -2.2707e+00, -6.9050e-01],
+                                                 [ 1.1544e-04, -1.7244e+00, -9.3285e-01,  3.2562e+00, -1.5918e+00]],
+
+                                                [[-4.1556e-01, -2.3034e+00,  3.3958e-01,  3.0738e-01,  2.1127e-01],
+                                                 [-1.3344e+00,  9.7688e-01,  6.5812e-01, -2.0208e+00,  1.2380e+00],
+                                                 [ 9.1565e-03, -2.0508e-01,  7.1917e-02,  5.5545e-01, -1.0744e-01],
+                                                 [ 1.7452e+00, -1.8257e+00,  6.3668e-01,  3.7559e-01,  1.6663e+00],
+                                                 [-3.1537e-01, -6.7241e-01, -7.6114e-02,  9.6129e-01, -3.6708e-01]]]]).unsqueeze(0)
+
+            let t3p1d2 = dsharp.convTranspose3d(t1, t2, padding=1, dilation=2)
+            let t3p1d2Correct = combo.tensor([[[[-1.0245e-01, -5.9647e-01,  9.3921e-02, -7.5587e-01,  1.9314e-01],
+                                                 [-1.2189e+00, -1.8433e+00,  1.6070e+00, -1.1514e+00,  2.3350e+00],
+                                                 [ 9.2224e-02,  9.4806e-01, -1.7268e-01,  5.7531e-01, -1.8053e-01],
+                                                 [ 4.2969e-01,  2.6431e+00, -2.2818e+00, -5.1769e-01, -1.5198e+00],
+                                                 [ 3.7179e-02, -8.5859e-01,  1.9636e-01,  7.2871e-02,  4.9428e-02]],
+
+                                                [[ 1.5057e-02, -2.7401e-01, -4.7146e-02,  3.9273e-01, -4.5927e+00],
+                                                 [ 1.5358e+00, -4.2029e+00,  3.6310e+00,  4.4393e+00,  2.8129e+00],
+                                                 [ 1.4288e+00, -5.9017e-01,  1.2119e+00, -1.0511e+00,  1.1724e+00],
+                                                 [-2.1768e+00,  3.1079e+00, -5.8648e+00, -3.4127e+00, -2.3617e+00],
+                                                 [-1.4544e+00,  9.4486e-01, -2.4129e+00,  1.8278e+00, -9.9002e-01]],
+
+                                                [[-2.2211e-02, -1.1428e-01,  1.7139e-02,  1.8895e+00, -3.9936e-01],
+                                                 [-1.0387e+00,  1.1806e+00, -3.1093e-01,  1.1913e+00, -3.1527e+00],
+                                                 [ 1.5942e-01, -1.1409e-01,  7.7268e-02, -2.1475e-01,  7.1630e-02],
+                                                 [ 1.8329e+00, -1.8513e-01,  1.5766e+00, -7.6421e-01,  9.6227e-01],
+                                                 [-1.3200e-01,  6.7251e-01, -1.8789e-01,  1.0284e-01, -8.4286e-02]],
+
+                                                [[-1.9183e-01,  4.5235e-02,  1.1921e-01,  1.6477e-01, -4.2937e-01],
+                                                 [-3.3870e+00,  6.8932e-01,  1.2275e+00, -4.6907e+00, -6.1358e+00],
+                                                 [-1.2204e+00,  9.5888e-01,  9.6550e-01, -2.7589e-01, -2.4401e+00],
+                                                 [ 1.8659e-01,  2.9610e-01,  3.8398e+00,  5.1360e+00,  3.0689e+00],
+                                                 [-5.4028e-01,  3.1447e-02,  1.1577e+00, -1.1192e+00,  1.6228e+00]],
+
+                                                [[-2.3675e-02, -1.0882e-01,  1.5483e-02,  6.4794e-01, -1.3376e-01],
+                                                 [ 2.8738e-01,  4.7778e-03, -5.1073e-01, -6.3953e-01,  2.9550e-01],
+                                                 [-8.1145e-02, -6.4408e-01,  1.1118e-01,  1.1950e+00, -2.1934e-01],
+                                                 [-1.0031e-01, -9.7736e-01,  3.8649e-01,  2.6536e+00, -2.2762e+00],
+                                                 [-8.2723e-02, -4.4742e-01,  6.8506e-02, -5.3513e-01,  1.3741e-01]]],
+
+
+                                               [[[-1.1435e-01, -2.5012e-01,  1.5733e-02, -3.1487e-02,  1.1962e-02],
+                                                 [-1.4747e+00, -6.3610e-01,  1.1947e-01, -2.2041e+00,  2.0339e+00],
+                                                 [ 1.2348e-01,  2.0001e-01, -1.9699e-03,  1.6360e+00, -3.5134e-01],
+                                                 [ 1.5491e+00,  1.3579e+00, -5.4192e-01,  7.8988e-02, -2.0117e+00],
+                                                 [-1.2555e-01, -6.3778e-01,  9.5123e-02,  4.9298e-01, -7.4163e-02]],
+
+                                                [[ 9.4726e-01, -8.8186e-01,  1.6573e+00, -8.1649e-01, -1.6891e+00],
+                                                 [ 2.4154e-01, -2.9949e+00, -6.8751e-02, -1.7125e+00,  1.9995e+00],
+                                                 [ 7.0803e-01, -1.0804e-01, -3.0661e-01,  3.2285e-01, -6.6276e-02],
+                                                 [ 3.9308e+00,  3.8945e+00, -2.8420e+00, -3.6102e+00,  2.7485e+00],
+                                                 [ 1.4156e+00, -1.0560e+00, -1.1896e+00,  8.1470e-01,  1.9007e+00]],
+
+                                                [[ 6.5155e-02, -6.6365e-01,  1.6384e-01,  1.0020e+00, -1.6052e-01],
+                                                 [-8.9127e-02,  1.0903e+00,  1.2974e+00,  1.1690e+00, -6.2094e-01],
+                                                 [ 9.6969e-02,  2.9025e-01, -3.0095e-02,  3.6201e-01, -8.7570e-02],
+                                                 [ 6.9967e-02,  1.1285e+00,  3.0907e-01,  2.5184e-01, -1.5294e+00],
+                                                 [ 1.0784e-01,  6.0061e-01, -9.3024e-02, -9.2099e-01,  1.6661e-01]],
+
+                                                [[-2.6337e-01,  2.8566e-01, -1.1974e+00,  1.1181e+00, -1.8616e-01],
+                                                 [ 3.3619e+00, -6.7208e-01, -1.8833e+00, -7.1228e-01,  8.5941e-02],
+                                                 [ 2.2306e+00, -9.6057e-01, -1.3195e+00,  7.1460e-01,  5.8736e-03],
+                                                 [-1.7553e+00,  2.0345e+00, -3.0523e+00,  1.1117e+00, -2.4375e+00],
+                                                 [-1.0486e+00,  9.6933e-01, -1.8792e+00,  8.1101e-01, -5.6141e-01]],
+
+                                                [[-1.0409e-02,  3.4735e-01, -7.7906e-02,  1.3655e-01, -5.5077e-02],
+                                                 [-1.3938e+00,  3.5525e-01,  2.0966e-01, -1.8086e+00, -2.7074e-01],
+                                                 [ 2.3803e-01,  9.9532e-01, -1.3451e-01, -1.1614e-01, -1.9662e-02],
+                                                 [ 2.0666e+00,  9.8112e-01,  2.0048e-01, -2.8437e+00, -4.7968e-02],
+                                                 [-7.6454e-02,  8.2957e-01, -2.0315e-01, -2.4032e-01, -1.5784e-02]]]]).unsqueeze(0)
+
+            Assert.That(t3.allclose(t3Correct, 0.01, 0.01))
+            Assert.That(t3p1.allclose(t3p1Correct, 0.01, 0.01))
+            Assert.That(t3p122.allclose(t3p122Correct, 0.01, 0.01))
+            Assert.That(t3s2.allclose(t3s2Correct, 0.01, 0.01))
+            Assert.That(t3s132.allclose(t3s132Correct, 0.01, 0.01))
+            Assert.That(t3s2p1.allclose(t3s2p1Correct, 0.01, 0.01))
+            Assert.That(t3p1d2.allclose(t3p1d2Correct, 0.01, 0.01))
+
+
diff --git a/tests/TensorMath.Tests/TestTensor.MaxPool.fs b/tests/TensorMath.Tests/TestTensor.MaxPool.fs
new file mode 100644
index 0000000..970c829
--- /dev/null
+++ b/tests/TensorMath.Tests/TestTensor.MaxPool.fs
@@ -0,0 +1,1208 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+
+[<TestFixture>]
+type TestTensorMaxPool () =
+
+    [<Test>]
+    member _.TestTensorMaxPool1D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([[[-2.1704, -1.1558,  2.5995,  1.3858, -1.3157, -0.3179,  0.9593,  -2.1432,  0.7169, -1.7999],
+                                     [ 0.4564, -0.2262,  0.3495,  0.4587, -0.3858,  0.2349,  0.2978,  0.6288,  1.1539,  0.2121]],
+
+                                    [[ 0.6654,  0.7151,  0.9980,  0.1321, -2.0009, -1.1897,  1.0608,  -1.8059, -0.2344,  1.6387],
+                                     [ 1.1872, -2.2679, -0.0297, -0.2067, -1.5622, -0.3916,  0.6039,  -1.1469,  0.4560,  1.2069]]])
+
+            let tk3, tk3i = dsharp.maxpool1di(t, 3)
+            let tk3Correct = combo.tensor([[[ 2.5995,  1.3858,  0.9593],
+                                            [ 0.4564,  0.4587,  1.1539]],
+                                     
+                                           [[ 0.9980,  0.1321,  1.0608],
+                                            [ 1.1872, -0.2067,  0.6039]]])
+            let tk3iCorrect = combo.tensor([[[2, 3, 6],
+                                             [0, 3, 8]],
+                                     
+                                            [[2, 3, 6],
+                                             [0, 3, 6]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk3Correct, tk3)
+            Assert.CheckEqual(tk3iCorrect, tk3i)
+
+            let tk3p1, tk3p1i = dsharp.maxpool1di(t, 3, padding=1)
+            let tk3p1Correct = combo.tensor([[[-1.1558,  2.5995,  0.9593,  0.7169],
+                                                [ 0.4564,  0.4587,  0.6288,  1.1539]],
+                                       
+                                               [[ 0.7151,  0.9980,  1.0608,  1.6387],
+                                                [ 1.1872, -0.0297,  0.6039,  1.2069]]])
+            let tk3p1iCorrect = combo.tensor([[[1, 2, 6, 8],
+                                                [0, 3, 7, 8]],
+                                       
+                                               [[1, 2, 6, 9],
+                                                [0, 2, 6, 9]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk3p1iCorrect, tk3p1i)
+            Assert.CheckEqual(tk3p1Correct, tk3p1)
+
+            let tk3s2, tk3s2i = dsharp.maxpool1di(t, 3, stride=2)
+            let tk3s2Correct = combo.tensor([[[ 2.5995,  2.5995,  0.9593,  0.9593],
+                                              [ 0.4564,  0.4587,  0.2978,  1.1539]],
+                                     
+                                             [[ 0.9980,  0.9980,  1.0608,  1.0608],
+                                              [ 1.1872, -0.0297,  0.6039,  0.6039]]])
+            let tk3s2iCorrect = combo.tensor([[[2, 2, 6, 6],
+                                                  [0, 3, 6, 8]],
+                                         
+                                                 [[2, 2, 6, 6],
+                                                  [0, 2, 6, 6]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk3s2iCorrect, tk3s2i)
+            Assert.CheckEqual(tk3s2Correct, tk3s2)
+
+            let tk4s3p2, tk4s3p2i = dsharp.maxpool1di(t, 4, stride=3, padding=2)
+            let tk4s3p2Correct = combo.tensor([[[-1.1558,  2.5995,  0.9593,  0.7169],
+                                                  [ 0.4564,  0.4587,  0.6288,  1.1539]],
+                                         
+                                                 [[ 0.7151,  0.9980,  1.0608,  1.6387],
+                                                  [ 1.1872, -0.0297,  0.6039,  1.2069]]])
+            let tk4s3p2iCorrect = combo.tensor([[[1, 2, 6, 8],
+                                                  [0, 3, 7, 8]],
+                                         
+                                                 [[1, 2, 6, 9],
+                                                  [0, 2, 6, 9]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk4s3p2iCorrect, tk4s3p2i)
+            Assert.CheckEqual(tk4s3p2Correct, tk4s3p2)
+
+        for combo in Combos.IntegralAndBool do 
+            let x = combo.zeros([1;4;4])
+            isInvalidOp(fun () -> dsharp.maxpool1d(x,3))
+
+    [<Test>]
+    member _.TestTensorMaxPool2D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([[[[ 0.7372,  0.7090,  0.9216,  0.3363,  1.0141, -0.7642,  0.3801, -0.9568],
+                                      [-0.3520, -1.2336,  1.8489,  0.9929, -0.8138,  0.0978, -1.3206, -1.5434],
+                                      [ 0.6883, -0.2346,  0.1735,  0.6695, -1.9122,  1.1338, -0.1248,  0.2164],
+                                      [-1.1349,  0.3008, -0.1635, -1.0362, -0.6487, -0.8422, -0.4334,  1.0604],
+                                      [-2.1562, -0.1079,  0.5744, -0.7275,  1.0254, -0.0508, -0.0525, -0.0746],
+                                      [-0.7494,  0.6819, -1.7327, -0.4838, -0.6120,  1.6331,  0.1797, -0.6068],
+                                      [ 0.6400,  0.1389,  0.3033,  0.3195,  0.9934,  1.2455, -1.0953,  0.9922],
+                                      [ 0.2375,  0.6003, -1.1614,  1.0146,  0.2100, -1.0145, -0.1933,  1.1415]],
+
+                                     [[-0.0819,  0.2091,  0.4351,  1.7527, -1.1970,  2.1048,  1.0200, -0.5153],
+                                      [ 1.0867, -1.8738, -0.2754, -0.5089,  0.8850, -0.4751, -0.7820,  1.4476],
+                                      [-0.9072,  0.9977, -0.9106, -0.3171, -1.2444,  0.7102,  0.5656,  1.2660],
+                                      [ 0.1986, -0.4967,  0.2384, -0.6551,  1.0156,  0.0520, -0.1964,  1.1367],
+                                      [ 0.8948,  2.2070,  0.9938,  0.5311, -1.0674,  0.3894,  0.4192, -0.6235],
+                                      [ 2.7646, -0.6509,  0.4669, -1.8774, -0.6341,  0.5113,  1.2398,  2.5090],
+                                      [ 1.0722,  0.8162, -2.3271,  1.3826,  1.3832,  0.6205, -0.9138, -0.8237],
+                                      [-0.0688, -1.6786,  0.1672, -0.7255, -0.1228, -0.1603, -2.1906, -2.6372]]],
+
+
+                                    [[[-1.0461,  0.4063,  0.2085, -0.7598, -1.3893, -0.8866,  1.0594, -0.6184],
+                                      [ 2.1120, -0.6475, -0.3964,  0.0378,  0.0138, -0.1672,  0.9265, -1.7734],
+                                      [-0.2313,  0.6284, -0.0508, -0.1014, -0.5059,  0.8666, -0.7010, -0.5073],
+                                      [ 0.1709,  0.2466,  0.1781, -1.6740, -0.0251, -1.4144, -2.1012,  0.3922],
+                                      [ 0.9141,  0.6582, -0.0826, -0.7104,  1.7133,  1.2406,  1.1415, -0.6222],
+                                      [-2.1525, -0.2996, -1.3787,  0.0336, -1.4643,  0.6534,  0.3996,  0.3145],
+                                      [-0.3298,  0.3855, -0.5100,  1.2770,  0.5306, -0.6604, -0.0489,  0.0609],
+                                      [-0.1552, -1.1218, -0.8435,  0.2365,  1.4428,  0.4234, -1.1083, -1.3874]],
+
+                                     [[ 0.0511,  0.1216, -1.0103, -1.2529,  1.7200, -0.0225,  0.7446, -0.8076],
+                                      [ 0.2543,  1.4250,  0.7869,  0.0526, -2.1598,  1.8228, -0.4628,  1.4234],
+                                      [ 0.5492,  0.8668,  0.2120,  0.6599, -1.0934, -1.3726,  0.4788, -0.1171],
+                                      [ 0.5121,  1.2607, -0.4565,  0.5448, -2.5025, -0.5503, -1.3373,  0.1711],
+                                      [-0.3939, -0.6382, -0.0899, -1.4706,  0.4580,  0.3304,  1.8958,  0.1178],
+                                      [ 0.1109,  0.2468,  0.3485, -0.0960, -0.0432, -0.3026, -1.9750,  0.4057],
+                                      [-1.1117, -0.3422,  1.2130, -1.1206,  0.9506, -0.7723,  0.3162, -0.5487],
+                                      [ 0.6304, -0.9149,  0.6075, -0.5371,  1.5875, -0.2979, -0.5832, -3.0311]]]])
+
+            let tk3, tk3i = dsharp.maxpool2di(t, 3)
+            let tk3Correct = combo.tensor([[[[1.8489, 1.1338],
+                                              [0.6819, 1.6331]],
+
+                                             [[1.0867, 2.1048],
+                                              [2.7646, 1.0156]]],
+
+
+                                            [[[2.1120, 0.8666],
+                                              [0.9141, 1.7133]],
+
+                                             [[1.4250, 1.8228],
+                                              [1.2607, 0.5448]]]])
+            let tk3iCorrect = combo.tensor([[[[10, 21],
+                                                  [41, 45]],
+
+                                                 [[ 8,  5],
+                                                  [40, 28]]],
+
+
+                                                [[[ 8, 21],
+                                                  [32, 36]],
+
+                                                 [[ 9, 13],
+                                                  [25, 27]]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk3Correct, tk3)
+            Assert.CheckEqual(tk3iCorrect, tk3i)
+
+            let tk3p1, tk3p1i = dsharp.maxpool2di(t, 3, padding=1)
+            let tk3p1Correct = combo.tensor([[[[0.7372, 1.8489, 0.3801],
+                                                  [0.6883, 1.0254, 1.1338],
+                                                  [0.6819, 1.0146, 1.6331]],
+
+                                                 [[1.0867, 1.7527, 2.1048],
+                                                  [2.2070, 1.0156, 1.2660],
+                                                  [2.7646, 1.3832, 2.5090]]],
+
+
+                                                [[[2.1120, 0.2085, 1.0594],
+                                                  [0.9141, 1.7133, 1.2406],
+                                                  [0.3855, 1.4428, 0.6534]],
+
+                                                 [[1.4250, 1.7200, 1.8228],
+                                                  [1.2607, 0.6599, 1.8958],
+                                                  [0.6304, 1.5875, 0.4057]]]])
+            let tk3p1iCorrect = combo.tensor([[[[ 0, 10,  6],
+                                                  [16, 36, 21],
+                                                  [41, 59, 45]],
+
+                                                 [[ 8,  3,  5],
+                                                  [33, 28, 23],
+                                                  [40, 52, 47]]],
+
+
+                                                [[[ 8,  2,  6],
+                                                  [32, 36, 37],
+                                                  [49, 60, 45]],
+
+                                                 [[ 9,  4, 13],
+                                                  [25, 19, 38],
+                                                  [56, 60, 47]]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk3p1iCorrect, tk3p1i)
+            Assert.CheckEqual(tk3p1Correct, tk3p1)
+
+            let tk3s2, tk3s2i = dsharp.maxpool2di(t, 3, stride=2)
+            let tk3s2Correct = combo.tensor([[[[1.8489, 1.8489, 1.1338],
+                                                  [0.6883, 1.0254, 1.1338],
+                                                  [0.6819, 1.0254, 1.6331]],
+
+                                                 [[1.0867, 1.7527, 2.1048],
+                                                  [2.2070, 1.0156, 1.0156],
+                                                  [2.7646, 1.3832, 1.3832]]],
+
+
+                                                [[[2.1120, 0.2085, 1.0594],
+                                                  [0.9141, 1.7133, 1.7133],
+                                                  [0.9141, 1.7133, 1.7133]],
+
+                                                 [[1.4250, 1.7200, 1.8228],
+                                                  [1.2607, 0.6599, 1.8958],
+                                                  [1.2130, 1.2130, 1.8958]]]])
+            let tk3s2iCorrect = combo.tensor([[[[10, 10, 21],
+                                                  [16, 36, 21],
+                                                  [41, 36, 45]],
+
+                                                 [[ 8,  3,  5],
+                                                  [33, 28, 28],
+                                                  [40, 52, 52]]],
+
+
+                                                [[[ 8,  2,  6],
+                                                  [32, 36, 36],
+                                                  [32, 36, 36]],
+
+                                                 [[ 9,  4, 13],
+                                                  [25, 19, 38],
+                                                  [50, 50, 38]]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk3s2iCorrect, tk3s2i)
+            Assert.CheckEqual(tk3s2Correct, tk3s2)
+
+            let tk4s3p2, tk4s3p2i = dsharp.maxpool2di(t, 4, stride=3, padding=2)
+            let tk4s3p2Correct = combo.tensor([[[[0.7372, 1.8489, 1.0141],
+                                                  [0.6883, 1.8489, 1.1338],
+                                                  [0.6819, 1.0254, 1.6331]],
+
+                                                 [[1.0867, 1.7527, 2.1048],
+                                                  [2.2070, 2.2070, 1.4476],
+                                                  [2.7646, 2.2070, 2.5090]]],
+
+
+                                                [[[2.1120, 0.4063, 1.0594],
+                                                  [2.1120, 1.7133, 1.7133],
+                                                  [0.9141, 1.7133, 1.7133]],
+
+                                                 [[1.4250, 1.7200, 1.8228],
+                                                  [1.4250, 1.4250, 1.8958],
+                                                  [0.6304, 1.5875, 1.8958]]]])
+            let tk4s3p2iCorrect = combo.tensor([[[[ 0, 10,  4],
+                                                      [16, 10, 21],
+                                                      [41, 36, 45]],
+
+                                                     [[ 8,  3,  5],
+                                                      [33, 33, 15],
+                                                      [40, 33, 47]]],
+
+
+                                                    [[[ 8,  1,  6],
+                                                      [ 8, 36, 36],
+                                                      [32, 36, 36]],
+
+                                                     [[ 9,  4, 13],
+                                                      [ 9,  9, 38],
+                                                      [56, 60, 38]]]], dtype=Dtype.Int32)
+            Assert.CheckEqual(tk4s3p2iCorrect, tk4s3p2i)
+            Assert.CheckEqual(tk4s3p2Correct, tk4s3p2)
+
+        for combo in Combos.IntegralAndBool do 
+            let x = combo.zeros([4;4;4;4])
+            isInvalidOp(fun () -> dsharp.maxpool2d(x,3))
+
+    [<Test>]
+    member _.TestTensorMaxPool3D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([[[[ 0.4633,  0.9173,  0.4568, -1.7660, -0.1077],
+                                       [-2.1112,  1.5542,  0.5720, -1.0952, -1.8144],
+                                       [ 0.3505, -0.9843, -2.5655, -0.9835,  1.2303],
+                                       [ 0.8156,  1.5415,  1.3066, -1.1820,  0.2060],
+                                       [ 0.0684,  1.5936,  0.2956, -0.5176, -1.6960]],
+
+                                      [[-1.7281, -0.7697, -2.2310,  0.3580,  0.6299],
+                                       [ 0.8558, -0.6180, -1.6077, -0.6779,  1.2910],
+                                       [ 0.1885, -0.7006, -0.1863, -1.6729, -0.5761],
+                                       [ 0.1940, -0.0399,  0.9329,  1.0687,  0.0955],
+                                       [-1.0189,  0.4046,  1.1762,  0.3842,  0.6831]],
+
+                                      [[ 0.2996,  0.5738,  0.0369,  0.2835, -0.2363],
+                                       [ 0.6847, -0.4949, -0.3974,  0.6808, -1.2942],
+                                       [ 1.0910, -0.0594, -0.0037, -0.3355, -1.5056],
+                                       [-0.0965,  1.1358,  1.2851, -1.7333, -1.1705],
+                                       [ 0.0966, -1.2780,  1.2939,  1.3469, -0.2603]],
+
+                                      [[-0.5270,  1.1442,  0.1259, -1.2813,  0.3536],
+                                       [ 0.1579,  0.0828,  1.3531, -0.9110, -0.8747],
+                                       [ 0.2473, -0.1507, -0.4880,  0.4575,  1.1186],
+                                       [ 2.0900,  1.0479, -0.7209, -1.6928,  1.8761],
+                                       [ 2.2015, -0.5097,  0.7364, -1.5177,  0.9212]],
+
+                                      [[ 1.0358,  1.6584, -1.9654, -1.3971,  1.5641],
+                                       [ 0.4032,  0.7737,  0.9351, -0.5245,  0.0783],
+                                       [-1.2932, -0.9885, -1.1850, -0.7403,  0.1739],
+                                       [-0.5471,  0.5017, -1.0571,  1.7574, -0.0911],
+                                       [ 0.6944, -1.2772,  0.7473, -1.0983,  1.1462]]],
+
+
+                                     [[[-1.2563,  0.0688,  1.0405, -0.2582,  0.7333],
+                                       [ 2.0711, -0.1815,  0.8876, -0.2907,  1.1195],
+                                       [-0.3912,  0.3624,  1.0576, -0.4748, -1.4021],
+                                       [ 1.2176, -0.6160, -0.3471,  1.1689,  0.5677],
+                                       [-0.0639,  0.3765, -0.2614,  1.8267,  0.0315]],
+
+                                      [[ 1.2927,  1.0709, -0.8808,  0.8106, -0.5315],
+                                       [ 0.7614, -0.3935,  1.2451, -0.0598, -0.5887],
+                                       [-0.4089, -0.8598,  0.2478,  0.1282, -0.2745],
+                                       [-0.4139, -1.2905, -0.2625, -2.0453,  1.8941],
+                                       [-0.2400, -1.2830, -0.3503, -0.8536, -0.5927]],
+
+                                      [[ 0.8200,  1.8860, -0.5216, -0.9590, -0.9760],
+                                       [-1.5796,  2.2379, -0.5714, -1.5612,  1.4035],
+                                       [-0.6434, -1.2257,  0.1408,  0.3781, -2.2344],
+                                       [ 0.4963,  0.2431,  0.6835,  0.0047,  1.3374],
+                                       [-1.5899,  2.5382,  0.9503,  1.9080,  1.8315]],
+
+                                      [[ 0.5853,  1.9343, -0.7472,  2.1774, -2.1895],
+                                       [-0.6187, -0.2870,  1.2485,  2.4069, -0.2632],
+                                       [-1.6047, -0.3379,  0.5372,  1.7098,  1.6220],
+                                       [ 0.5255,  0.2564, -1.8615,  1.5519, -0.5655],
+                                       [-0.9452, -1.1828, -1.8192,  1.1349,  0.9806]],
+
+                                      [[-1.8198,  0.5455,  1.1761,  1.3070, -0.4654],
+                                       [ 1.2673,  0.2608,  0.8385, -1.0407, -0.6288],
+                                       [-0.3860,  1.3343,  1.3084,  0.5794,  0.4639],
+                                       [ 0.4750, -0.9006, -1.5002,  0.8689, -0.0379],
+                                       [ 0.2891,  0.0195, -0.0503, -0.3235,  1.5407]]]]).unsqueeze(0)
+
+            let tk2, tk2i = dsharp.maxpool3di(t, 2)
+            let tk2Correct = combo.tensor([[[[1.5542, 0.5720],
+                                                [1.5415, 1.3066]],
+                                     
+                                               [[1.1442, 1.3531],
+                                                [2.0900, 1.2851]]],
+                                     
+                                     
+                                              [[[2.0711, 1.2451],
+                                                [1.2176, 1.1689]],
+                                     
+                                               [[2.2379, 2.4069],
+                                                [0.5255, 1.7098]]]]).unsqueeze(0)
+            let tk2iCorrect = combo.tensor([[[[ 6,  7],
+                                                [16, 17]],
+                                     
+                                               [[76, 82],
+                                                [90, 67]]],
+                                     
+                                     
+                                              [[[ 5, 32],
+                                                [15, 18]],
+                                     
+                                               [[56, 83],
+                                                [90, 88]]]], dtype=Dtype.Int32).unsqueeze(0)
+            Assert.CheckEqual(tk2Correct, tk2)
+            Assert.CheckEqual(tk2iCorrect, tk2i)
+
+            let tk2p1, tk2p1i = dsharp.maxpool3di(t, 2, padding=1)
+            let tk2p1Correct = combo.tensor([[[[ 0.4633,  0.9173, -0.1077],
+                                                [ 0.3505,  1.5542,  1.2303],
+                                                [ 0.8156,  1.5936,  0.2060]],
+                                     
+                                               [[ 0.2996,  0.5738,  0.6299],
+                                                [ 1.0910, -0.0037,  1.2910],
+                                                [ 0.1940,  1.2939,  1.3469]],
+                                     
+                                               [[ 1.0358,  1.6584,  1.5641],
+                                                [ 0.4032,  1.3531,  1.1186],
+                                                [ 2.2015,  1.0479,  1.8761]]],
+                                     
+                                     
+                                              [[[-1.2563,  1.0405,  0.7333],
+                                                [ 2.0711,  1.0576,  1.1195],
+                                                [ 1.2176,  0.3765,  1.8267]],
+                                     
+                                               [[ 1.2927,  1.8860,  0.8106],
+                                                [ 0.7614,  2.2379,  1.4035],
+                                                [ 0.4963,  2.5382,  1.9080]],
+                                     
+                                               [[ 0.5853,  1.9343,  2.1774],
+                                                [ 1.2673,  1.3343,  2.4069],
+                                                [ 0.5255,  0.2564,  1.5519]]]]).unsqueeze(0)
+            let tk2p1iCorrect = combo.tensor([[[[  0,   1,   4],
+                                                    [ 10,   6,  14],
+                                                    [ 15,  21,  19]],
+                                         
+                                                   [[ 50,  51,  29],
+                                                    [ 60,  62,  34],
+                                                    [ 40,  72,  73]],
+                                         
+                                                   [[100, 101, 104],
+                                                    [105,  82,  89],
+                                                    [ 95,  91,  94]]],
+                                         
+                                         
+                                                  [[[  0,   2,   4],
+                                                    [  5,  12,   9],
+                                                    [ 15,  21,  23]],
+                                         
+                                                   [[ 25,  51,  28],
+                                                    [ 30,  56,  59],
+                                                    [ 65,  71,  73]],
+                                         
+                                                   [[ 75,  76,  78],
+                                                    [105, 111,  83],
+                                                    [ 90,  91,  93]]]], dtype=Dtype.Int32).unsqueeze(0)
+            Assert.CheckEqual(tk2p1iCorrect, tk2p1i)
+            Assert.CheckEqual(tk2p1Correct, tk2p1)
+
+            let tk2s3, tk2s3i = dsharp.maxpool3di(t, 2, stride=3)
+            let tk2s3Correct = combo.tensor([[[[1.5542, 1.2910],
+                                                [1.5936, 1.0687]],
+                                     
+                                               [[1.6584, 1.5641],
+                                                [2.2015, 1.8761]]],
+                                     
+                                     
+                                              [[[2.0711, 1.1195],
+                                                [1.2176, 1.8941]],
+                                     
+                                               [[1.9343, 2.4069],
+                                                [0.5255, 1.5519]]]]).unsqueeze(0)
+            let tk2s3iCorrect = combo.tensor([[[[  6,  34],
+                                                    [ 21,  43]],
+                                         
+                                                   [[101, 104],
+                                                    [ 95,  94]]],
+                                         
+                                         
+                                                  [[[  5,   9],
+                                                    [ 15,  44]],
+                                         
+                                                   [[ 76,  83],
+                                                    [ 90,  93]]]], dtype=Dtype.Int32).unsqueeze(0)
+            Assert.CheckEqual(tk2s3iCorrect, tk2s3i)
+            Assert.CheckEqual(tk2s3Correct, tk2s3)
+
+            let tk2s3p1, tk2s3p1i = dsharp.maxpool3di(t, 2, stride=3, padding=1)
+            let tk2s3p1Correct = combo.tensor([[[[ 0.4633,  0.4568],
+                                                    [ 0.8156,  1.3066]],
+                                         
+                                                   [[ 0.2996,  0.2835],
+                                                    [ 2.0900,  1.2851]]],
+                                         
+                                         
+                                                  [[[-1.2563,  1.0405],
+                                                    [ 1.2176,  1.1689]],
+                                         
+                                                   [[ 0.8200,  2.1774],
+                                                    [ 0.5255,  1.7098]]]]).unsqueeze(0)
+            let tk2s3p1iCorrect = combo.tensor([[[[ 0,  2],
+                                                    [15, 17]],
+                                         
+                                                   [[50, 53],
+                                                    [90, 67]]],
+                                         
+                                         
+                                                  [[[ 0,  2],
+                                                    [15, 18]],
+                                         
+                                                   [[50, 78],
+                                                    [90, 88]]]], dtype=Dtype.Int32).unsqueeze(0)
+            Assert.CheckEqual(tk2s3p1iCorrect, tk2s3p1i)
+            Assert.CheckEqual(tk2s3p1Correct, tk2s3p1)
+
+        for combo in Combos.IntegralAndBool do 
+            let x = combo.zeros([4;4;4;4;4])
+            isInvalidOp(fun () -> dsharp.maxpool3d(x,3))
+
+    [<Test>]
+    member _.TestTensorMaxUnpool1D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let tk3 = combo.tensor([[[ 2.5995,  1.3858,  0.9593],
+                                      [ 0.4564,  0.4587,  1.1539]],
+                             
+                                     [[ 0.9980,  0.1321,  1.0608],
+                                      [ 1.1872, -0.2067,  0.6039]]])
+            let tk3i = combo.tensor([[[2, 3, 6],
+                                          [0, 3, 8]],
+                                 
+                                         [[2, 3, 6],
+                                          [0, 3, 6]]], dtype=Dtype.Int32)
+            let tk3u = dsharp.maxunpool1d(tk3, tk3i, 3)
+            let tk3uCorrect = combo.tensor([[[ 0.0000,  0.0000,  2.5995,  1.3858,  0.0000,  0.0000,  0.9593,  0.0000,  0.0000],
+                                             [ 0.4564,  0.0000,  0.0000,  0.4587,  0.0000,  0.0000,  0.0000,  0.0000,  1.1539]],
+
+                                            [[ 0.0000,  0.0000,  0.9980,  0.1321,  0.0000,  0.0000,  1.0608,  0.0000,  0.0000],
+                                             [ 1.1872,  0.0000,  0.0000, -0.2067,  0.0000,  0.0000,  0.6039,  0.0000,  0.0000]]])
+            Assert.CheckEqual(tk3uCorrect, tk3u)
+
+            let tk3p1 = combo.tensor([[[-1.1558,  2.5995,  0.9593,  0.7169],
+                                            [ 0.4564,  0.4587,  0.6288,  1.1539]],
+                                   
+                                           [[ 0.7151,  0.9980,  1.0608,  1.6387],
+                                            [ 1.1872, -0.0297,  0.6039,  1.2069]]])
+            let tk3p1i = combo.tensor([[[1, 2, 6, 8],
+                                                [0, 3, 7, 8]],
+                                       
+                                               [[1, 2, 6, 9],
+                                                [0, 2, 6, 9]]], dtype=Dtype.Int32)
+            let tk3p1u = dsharp.maxunpool1d(tk3p1, tk3p1i, 3, padding=1)
+            let tk3p1uCorrect = combo.tensor([[[ 0.0000, -1.1558,  2.5995,  0.0000,  0.0000,  0.0000,  0.9593,
+                                                   0.0000,  0.7169,  0.0000],
+                                                 [ 0.4564,  0.0000,  0.0000,  0.4587,  0.0000,  0.0000,  0.0000,
+                                                   0.6288,  1.1539,  0.0000]],
+
+                                                [[ 0.0000,  0.7151,  0.9980,  0.0000,  0.0000,  0.0000,  1.0608,
+                                                   0.0000,  0.0000,  1.6387],
+                                                 [ 1.1872,  0.0000, -0.0297,  0.0000,  0.0000,  0.0000,  0.6039,
+                                                   0.0000,  0.0000,  1.2069]]])
+            Assert.CheckEqual(tk3p1uCorrect, tk3p1u)
+
+            let tk3s2 = combo.tensor([[[ 2.5995,  2.5995,  0.9593,  0.9593],
+                                              [ 0.4564,  0.4587,  0.2978,  1.1539]],
+                                     
+                                             [[ 0.9980,  0.9980,  1.0608,  1.0608],
+                                              [ 1.1872, -0.0297,  0.6039,  0.6039]]])
+            let tk3s2i = combo.tensor([[[2, 2, 6, 6],
+                                                  [0, 3, 6, 8]],
+                                         
+                                                 [[2, 2, 6, 6],
+                                                  [0, 2, 6, 6]]], dtype=Dtype.Int32)
+            let tk3s2u = dsharp.maxunpool1d(tk3s2, tk3s2i, 3, stride=2)
+            let tk3s2uCorrect = combo.tensor([[[ 0.0000,  0.0000,  2.5995,  0.0000,  0.0000,  0.0000,  0.9593,
+                                                   0.0000,  0.0000],
+                                                 [ 0.4564,  0.0000,  0.0000,  0.4587,  0.0000,  0.0000,  0.2978,
+                                                   0.0000,  1.1539]],
+
+                                                [[ 0.0000,  0.0000,  0.9980,  0.0000,  0.0000,  0.0000,  1.0608,
+                                                   0.0000,  0.0000],
+                                                 [ 1.1872,  0.0000, -0.0297,  0.0000,  0.0000,  0.0000,  0.6039,
+                                                   0.0000,  0.0000]]])
+            Assert.CheckEqual(tk3s2uCorrect, tk3s2u)
+
+            let tk4s3p2 = combo.tensor([[[-1.1558,  2.5995,  0.9593,  0.7169],
+                                              [ 0.4564,  0.4587,  0.6288,  1.1539]],
+                                     
+                                             [[ 0.7151,  0.9980,  1.0608,  1.6387],
+                                              [ 1.1872, -0.0297,  0.6039,  1.2069]]])
+            let tk4s3p2i = combo.tensor([[[1, 2, 6, 8],
+                                                  [0, 3, 7, 8]],
+                                         
+                                                 [[1, 2, 6, 9],
+                                                  [0, 2, 6, 9]]], dtype=Dtype.Int32)
+            let tk4s3p2u = dsharp.maxunpool1d(tk4s3p2, tk4s3p2i, 4, stride=3, padding=2, outputSize=[2;2;10])
+            let tk4s3p2uCorrect = combo.tensor([[[ 0.0000, -1.1558,  2.5995,  0.0000,  0.0000,  0.0000,  0.9593,
+                                                   0.0000,  0.7169,  0.0000],
+                                                 [ 0.4564,  0.0000,  0.0000,  0.4587,  0.0000,  0.0000,  0.0000,
+                                                   0.6288,  1.1539,  0.0000]],
+
+                                                [[ 0.0000,  0.7151,  0.9980,  0.0000,  0.0000,  0.0000,  1.0608,
+                                                   0.0000,  0.0000,  1.6387],
+                                                 [ 1.1872,  0.0000, -0.0297,  0.0000,  0.0000,  0.0000,  0.6039,
+                                                   0.0000,  0.0000,  1.2069]]])
+            Assert.CheckEqual(tk4s3p2uCorrect, tk4s3p2u)
+
+    [<Test>]
+    member _.TestTensorMaxUnpool2D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let tk3 = combo.tensor([[[[1.8489, 1.1338],
+                                              [0.6819, 1.6331]],
+
+                                             [[1.0867, 2.1048],
+                                              [2.7646, 1.0156]]],
+
+
+                                            [[[2.1120, 0.8666],
+                                              [0.9141, 1.7133]],
+
+                                             [[1.4250, 1.8228],
+                                              [1.2607, 0.5448]]]])
+            let tk3i = combo.tensor([[[[10, 21],
+                                                  [41, 45]],
+
+                                                 [[ 8,  5],
+                                                  [40, 28]]],
+
+
+                                                [[[ 8, 21],
+                                                  [32, 36]],
+
+                                                 [[ 9, 13],
+                                                  [25, 27]]]], dtype=Dtype.Int32)
+            let tk3u = dsharp.maxunpool2d(tk3, tk3i, 3, outputSize=[2;2;8;8])
+            let tk3uCorrect = combo.tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 1.8489, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.1338, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.6819, 0.0000, 0.0000, 0.0000, 1.6331, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                             [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 2.1048, 0.0000, 0.0000],
+                                              [1.0867, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 1.0156, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [2.7646, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],
+
+
+                                            [[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [2.1120, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8666, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.9141, 0.0000, 0.0000, 0.0000, 1.7133, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                             [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 1.4250, 0.0000, 0.0000, 0.0000, 1.8228, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 1.2607, 0.0000, 0.5448, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                              [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])
+            Assert.CheckEqual(tk3uCorrect, tk3u)
+
+            let tk3p1 = combo.tensor([[[[0.7372, 1.8489, 0.3801],
+                                              [0.6883, 1.0254, 1.1338],
+                                              [0.6819, 1.0146, 1.6331]],
+
+                                             [[1.0867, 1.7527, 2.1048],
+                                              [2.2070, 1.0156, 1.2660],
+                                              [2.7646, 1.3832, 2.5090]]],
+
+
+                                            [[[2.1120, 0.2085, 1.0594],
+                                              [0.9141, 1.7133, 1.2406],
+                                              [0.3855, 1.4428, 0.6534]],
+
+                                             [[1.4250, 1.7200, 1.8228],
+                                              [1.2607, 0.6599, 1.8958],
+                                              [0.6304, 1.5875, 0.4057]]]])
+            let tk3p1i = combo.tensor([[[[ 0, 10,  6],
+                                                  [16, 36, 21],
+                                                  [41, 59, 45]],
+
+                                                 [[ 8,  3,  5],
+                                                  [33, 28, 23],
+                                                  [40, 52, 47]]],
+
+
+                                                [[[ 8,  2,  6],
+                                                  [32, 36, 37],
+                                                  [49, 60, 45]],
+
+                                                 [[ 9,  4, 13],
+                                                  [25, 19, 38],
+                                                  [56, 60, 47]]]], dtype=Dtype.Int32)
+            let tk3p1u = dsharp.maxunpool2d(tk3p1, tk3p1i, 3, padding=1, outputSize=[2;2;8;8])
+            let tk3p1uCorrect = combo.tensor([[[[0.7372, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3801, 0.0000],
+                                                  [0.0000, 0.0000, 1.8489, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.6883, 0.0000, 0.0000, 0.0000, 0.0000, 1.1338, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.0254, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.6819, 0.0000, 0.0000, 0.0000, 1.6331, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 1.0146, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                 [[0.0000, 0.0000, 0.0000, 1.7527, 0.0000, 2.1048, 0.0000, 0.0000],
+                                                  [1.0867, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.2660],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.0156, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 2.2070, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [2.7646, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 2.5090],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.3832, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],
+
+
+                                                [[[0.0000, 0.0000, 0.2085, 0.0000, 0.0000, 0.0000, 1.0594, 0.0000],
+                                                  [2.1120, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.9141, 0.0000, 0.0000, 0.0000, 1.7133, 1.2406, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6534, 0.0000, 0.0000],
+                                                  [0.0000, 0.3855, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.4428, 0.0000, 0.0000, 0.0000]],
+
+                                                 [[0.0000, 0.0000, 0.0000, 0.0000, 1.7200, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 1.4250, 0.0000, 0.0000, 0.0000, 1.8228, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.6599, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 1.2607, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.8958, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4057],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.6304, 0.0000, 0.0000, 0.0000, 1.5875, 0.0000, 0.0000, 0.0000]]]])
+            Assert.CheckEqual(tk3p1uCorrect, tk3p1u)
+
+            let tk3s2 = combo.tensor([[[[1.8489, 1.8489, 1.1338],
+                                              [0.6883, 1.0254, 1.1338],
+                                              [0.6819, 1.0254, 1.6331]],
+
+                                             [[1.0867, 1.7527, 2.1048],
+                                              [2.2070, 1.0156, 1.0156],
+                                              [2.7646, 1.3832, 1.3832]]],
+
+
+                                            [[[2.1120, 0.2085, 1.0594],
+                                              [0.9141, 1.7133, 1.7133],
+                                              [0.9141, 1.7133, 1.7133]],
+
+                                             [[1.4250, 1.7200, 1.8228],
+                                              [1.2607, 0.6599, 1.8958],
+                                              [1.2130, 1.2130, 1.8958]]]])
+            let tk3s2i = combo.tensor([[[[10, 10, 21],
+                                                  [16, 36, 21],
+                                                  [41, 36, 45]],
+
+                                                 [[ 8,  3,  5],
+                                                  [33, 28, 28],
+                                                  [40, 52, 52]]],
+
+
+                                                [[[ 8,  2,  6],
+                                                  [32, 36, 36],
+                                                  [32, 36, 36]],
+
+                                                 [[ 9,  4, 13],
+                                                  [25, 19, 38],
+                                                  [50, 50, 38]]]], dtype=Dtype.Int32)
+            let tk3s2u = dsharp.maxunpool2d(tk3s2, tk3s2i, 3, stride=2, outputSize=[2;2;8;8])
+            let tk3s2uCorrect = combo.tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 1.8489, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.6883, 0.0000, 0.0000, 0.0000, 0.0000, 1.1338, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.0254, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.6819, 0.0000, 0.0000, 0.0000, 1.6331, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                 [[0.0000, 0.0000, 0.0000, 1.7527, 0.0000, 2.1048, 0.0000, 0.0000],
+                                                  [1.0867, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.0156, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 2.2070, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [2.7646, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.3832, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],
+
+
+                                                [[[0.0000, 0.0000, 0.2085, 0.0000, 0.0000, 0.0000, 1.0594, 0.0000],
+                                                  [2.1120, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.9141, 0.0000, 0.0000, 0.0000, 1.7133, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                 [[0.0000, 0.0000, 0.0000, 0.0000, 1.7200, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 1.4250, 0.0000, 0.0000, 0.0000, 1.8228, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.6599, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 1.2607, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.8958, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 1.2130, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]])
+            Assert.CheckEqual(tk3s2uCorrect, tk3s2u)
+
+            let tk4s3p2 = combo.tensor([[[[0.7372, 1.8489, 1.0141],
+                                              [0.6883, 1.8489, 1.1338],
+                                              [0.6819, 1.0254, 1.6331]],
+
+                                             [[1.0867, 1.7527, 2.1048],
+                                              [2.2070, 2.2070, 1.4476],
+                                              [2.7646, 2.2070, 2.5090]]],
+
+
+                                            [[[2.1120, 0.4063, 1.0594],
+                                              [2.1120, 1.7133, 1.7133],
+                                              [0.9141, 1.7133, 1.7133]],
+
+                                             [[1.4250, 1.7200, 1.8228],
+                                              [1.4250, 1.4250, 1.8958],
+                                              [0.6304, 1.5875, 1.8958]]]])
+            let tk4s3p2i = combo.tensor([[[[ 0, 10,  4],
+                                                      [16, 10, 21],
+                                                      [41, 36, 45]],
+
+                                                     [[ 8,  3,  5],
+                                                      [33, 33, 15],
+                                                      [40, 33, 47]]],
+
+
+                                                    [[[ 8,  1,  6],
+                                                      [ 8, 36, 36],
+                                                      [32, 36, 36]],
+
+                                                     [[ 9,  4, 13],
+                                                      [ 9,  9, 38],
+                                                      [56, 60, 38]]]], dtype=Dtype.Int32)
+            let tk4s3p2u = dsharp.maxunpool2d(tk4s3p2, tk4s3p2i, 4, stride=3, padding=2, outputSize=[2;2;8;8])
+            let tk4s3p2uCorrect = combo.tensor([[[[0.7372, 0.0000, 0.0000, 0.0000, 1.0141, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 1.8489, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.6883, 0.0000, 0.0000, 0.0000, 0.0000, 1.1338, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 1.0254, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.6819, 0.0000, 0.0000, 0.0000, 1.6331, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                 [[0.0000, 0.0000, 0.0000, 1.7527, 0.0000, 2.1048, 0.0000, 0.0000],
+                                                  [1.0867, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.4476],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 2.2070, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [2.7646, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 2.5090],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],
+
+
+                                                [[[0.0000, 0.4063, 0.0000, 0.0000, 0.0000, 0.0000, 1.0594, 0.0000],
+                                                  [2.1120, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.9141, 0.0000, 0.0000, 0.0000, 1.7133, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                 [[0.0000, 0.0000, 0.0000, 0.0000, 1.7200, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 1.4250, 0.0000, 0.0000, 0.0000, 1.8228, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.8958, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                  [0.6304, 0.0000, 0.0000, 0.0000, 1.5875, 0.0000, 0.0000, 0.0000]]]])
+            Assert.CheckEqual(tk4s3p2uCorrect, tk4s3p2u)
+
+
+    [<Test>]
+    member _.TestTensorMaxUnpool3D () =
+        for combo in Combos.FloatingPointExcept16s do
+            let tk2 = combo.tensor([[[[1.5542, 0.5720],
+                                        [1.5415, 1.3066]],
+                             
+                                       [[1.1442, 1.3531],
+                                        [2.0900, 1.2851]]],
+                             
+                             
+                                      [[[2.0711, 1.2451],
+                                        [1.2176, 1.1689]],
+                             
+                                       [[2.2379, 2.4069],
+                                        [0.5255, 1.7098]]]]).unsqueeze(0)
+            let tk2i = combo.tensor([[[[ 6,  7],
+                                        [16, 17]],
+                             
+                                       [[76, 82],
+                                        [90, 67]]],
+                             
+                             
+                                      [[[ 5, 32],
+                                        [15, 18]],
+                             
+                                       [[56, 83],
+                                        [90, 88]]]], dtype=Dtype.Int32).unsqueeze(0)
+            let tk2u = dsharp.maxunpool3d(tk2, tk2i, 2, outputSize=[1;2;5;5;5])
+            let tk2uCorrect = combo.tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 1.5542, 0.5720, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 1.5415, 1.3066, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 1.2851, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 1.1442, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 1.3531, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [2.0900, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],
+
+
+                                             [[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [2.0711, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [1.2176, 0.0000, 0.0000, 1.1689, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 1.2451, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 2.2379, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 2.4069, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 1.7098, 0.0000],
+                                               [0.5255, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                              [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                               [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]]).unsqueeze(0)
+            Assert.CheckEqual(tk2uCorrect, tk2u)
+
+            let tk2p1 = combo.tensor([[[[ 0.4633,  0.9173, -0.1077],
+                                                [ 0.3505,  1.5542,  1.2303],
+                                                [ 0.8156,  1.5936,  0.2060]],
+                                     
+                                               [[ 0.2996,  0.5738,  0.6299],
+                                                [ 1.0910, -0.0037,  1.2910],
+                                                [ 0.1940,  1.2939,  1.3469]],
+                                     
+                                               [[ 1.0358,  1.6584,  1.5641],
+                                                [ 0.4032,  1.3531,  1.1186],
+                                                [ 2.2015,  1.0479,  1.8761]]],
+                                     
+                                     
+                                              [[[-1.2563,  1.0405,  0.7333],
+                                                [ 2.0711,  1.0576,  1.1195],
+                                                [ 1.2176,  0.3765,  1.8267]],
+                                     
+                                               [[ 1.2927,  1.8860,  0.8106],
+                                                [ 0.7614,  2.2379,  1.4035],
+                                                [ 0.4963,  2.5382,  1.9080]],
+                                     
+                                               [[ 0.5853,  1.9343,  2.1774],
+                                                [ 1.2673,  1.3343,  2.4069],
+                                                [ 0.5255,  0.2564,  1.5519]]]]).unsqueeze(0)
+            let tk2p1i = combo.tensor([[[[  0,   1,   4],
+                                                    [ 10,   6,  14],
+                                                    [ 15,  21,  19]],
+                                         
+                                                   [[ 50,  51,  29],
+                                                    [ 60,  62,  34],
+                                                    [ 40,  72,  73]],
+                                         
+                                                   [[100, 101, 104],
+                                                    [105,  82,  89],
+                                                    [ 95,  91,  94]]],
+                                         
+                                         
+                                                  [[[  0,   2,   4],
+                                                    [  5,  12,   9],
+                                                    [ 15,  21,  23]],
+                                         
+                                                   [[ 25,  51,  28],
+                                                    [ 30,  56,  59],
+                                                    [ 65,  71,  73]],
+                                         
+                                                   [[ 75,  76,  78],
+                                                    [105, 111,  83],
+                                                    [ 90,  91,  93]]]], dtype=Dtype.Int32).unsqueeze(0)
+            let tk2p1u = dsharp.maxunpool3d(tk2p1, tk2p1i, 2, padding=1, outputSize=[1;2;5;5;5])
+            let tk2p1uCorrect = combo.tensor([[[[ 0.4633,  0.9173,  0.0000,  0.0000, -0.1077],
+                                                   [ 0.0000,  1.5542,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.3505,  0.0000,  0.0000,  0.0000,  1.2303],
+                                                   [ 0.8156,  0.0000,  0.0000,  0.0000,  0.2060],
+                                                   [ 0.0000,  1.5936,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.6299],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  1.2910],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.1940,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.2996,  0.5738,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 1.0910,  0.0000, -0.0037,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  1.2939,  1.3469,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  1.3531,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  1.1186],
+                                                   [ 0.0000,  1.0479,  0.0000,  0.0000,  1.8761],
+                                                   [ 2.2015,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 1.0358,  1.6584,  0.0000,  0.0000,  1.5641],
+                                                   [ 0.4032,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]],
+
+
+                                                 [[[-1.2563,  0.0000,  1.0405,  0.0000,  0.7333],
+                                                   [ 2.0711,  0.0000,  0.0000,  0.0000,  1.1195],
+                                                   [ 0.0000,  0.0000,  1.0576,  0.0000,  0.0000],
+                                                   [ 1.2176,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.3765,  0.0000,  1.8267,  0.0000]],
+
+                                                  [[ 1.2927,  0.0000,  0.0000,  0.8106,  0.0000],
+                                                   [ 0.7614,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  1.8860,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  2.2379,  0.0000,  0.0000,  1.4035],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.4963,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  2.5382,  0.0000,  1.9080,  0.0000]],
+
+                                                  [[ 0.5853,  1.9343,  0.0000,  2.1774,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  2.4069,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.5255,  0.2564,  0.0000,  1.5519,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 1.2673,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  1.3343,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]]]).unsqueeze(0)
+            Assert.CheckEqual(tk2p1uCorrect, tk2p1u)
+
+            let tk2s3 = combo.tensor([[[[1.5542, 1.2910],
+                                            [1.5936, 1.0687]],
+                                 
+                                           [[1.6584, 1.5641],
+                                            [2.2015, 1.8761]]],
+                                 
+                                 
+                                          [[[2.0711, 1.1195],
+                                            [1.2176, 1.8941]],
+                                 
+                                           [[1.9343, 2.4069],
+                                            [0.5255, 1.5519]]]]).unsqueeze(0)
+            let tk2s3i = combo.tensor([[[[  6,  34],
+                                                    [ 21,  43]],
+                                         
+                                                   [[101, 104],
+                                                    [ 95,  94]]],
+                                         
+                                         
+                                                  [[[  5,   9],
+                                                    [ 15,  44]],
+                                         
+                                                   [[ 76,  83],
+                                                    [ 90,  93]]]], dtype=Dtype.Int32).unsqueeze(0)
+            let tk2s3u = dsharp.maxunpool3d(tk2s3, tk2s3i, 2, stride=3, outputSize=[1;2;5;5;5])
+            let tk2s3uCorrect = combo.tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 1.5542, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 1.5936, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 1.2910],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 1.0687, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 1.8761],
+                                                   [2.2015, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 1.6584, 0.0000, 0.0000, 1.5641],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]],
+
+
+                                                 [[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [2.0711, 0.0000, 0.0000, 0.0000, 1.1195],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [1.2176, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 1.8941],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 1.9343, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 2.4069, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.5255, 0.0000, 0.0000, 1.5519, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
+
+                                                  [[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
+                                                   [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]]).unsqueeze(0)
+            Assert.CheckEqual(tk2s3uCorrect, tk2s3u)
+
+            let tk2s3p1 = combo.tensor([[[[ 0.4633,  0.4568],
+                                                [ 0.8156,  1.3066]],
+                                     
+                                               [[ 0.2996,  0.2835],
+                                                [ 2.0900,  1.2851]]],
+                                     
+                                     
+                                              [[[-1.2563,  1.0405],
+                                                [ 1.2176,  1.1689]],
+                                     
+                                               [[ 0.8200,  2.1774],
+                                                [ 0.5255,  1.7098]]]]).unsqueeze(0)
+            let tk2s3p1i = combo.tensor([[[[ 0,  2],
+                                                    [15, 17]],
+                                         
+                                                   [[50, 53],
+                                                    [90, 67]]],
+                                         
+                                         
+                                                  [[[ 0,  2],
+                                                    [15, 18]],
+                                         
+                                                   [[50, 78],
+                                                    [90, 88]]]], dtype=Dtype.Int32).unsqueeze(0)
+            let tk2s3p1u = dsharp.maxunpool3d(tk2s3p1, tk2s3p1i, 2, stride=3, padding=1, outputSize=[1;2;5;5;5])
+            let tk2s3p1uCorrect = combo.tensor([[[[ 0.4633,  0.0000,  0.4568,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.8156,  0.0000,  1.3066,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.2996,  0.0000,  0.0000,  0.2835,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  1.2851,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 2.0900,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]],
+
+
+                                                 [[[-1.2563,  0.0000,  1.0405,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 1.2176,  0.0000,  0.0000,  1.1689,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.8200,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  2.1774,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  1.7098,  0.0000],
+                                                   [ 0.5255,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],
+
+                                                  [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
+                                                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]]]).unsqueeze(0)
+            Assert.CheckEqual(tk2s3p1uCorrect, tk2s3p1u)
+
diff --git a/tests/TensorMath.Tests/TestTensor.fs b/tests/TensorMath.Tests/TestTensor.fs
new file mode 100644
index 0000000..2fb2c9c
--- /dev/null
+++ b/tests/TensorMath.Tests/TestTensor.fs
@@ -0,0 +1,5431 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+open TensorMath.Util
+open System
+
+[<TestFixture>]
+type TestTensor () =
+
+    member _.TestTensorCreateAllTensorTypesGeneric (ofDouble: double -> 'T) =
+      // Test creating these types of tensors
+      for combo in Combos.All do 
+        let t0 = combo.tensor(ofDouble 1.)
+        let t0ShapeCorrect = [||]
+        let t0DimCorrect = 0
+
+        Assert.CheckEqual(t0ShapeCorrect, t0.shape)
+        Assert.CheckEqual(t0DimCorrect, t0.dim)
+        Assert.CheckEqual(combo.dtype, t0.dtype)
+
+        let t1 = combo.tensor([ofDouble 1.; ofDouble 2.; ofDouble 3.])
+        let t1ShapeCorrect = [|3|]
+        let t1DimCorrect = 1
+
+        Assert.CheckEqual(t1ShapeCorrect, t1.shape)
+        Assert.CheckEqual(t1DimCorrect, t1.dim)
+        Assert.CheckEqual(combo.dtype, t1.dtype)
+
+        let t2 = combo.tensor([[ofDouble 1.; ofDouble 2.; ofDouble 3.]; [ofDouble 4.; ofDouble 5.; ofDouble 6.]])
+        let t2ShapeCorrect = [|2; 3|]
+        let t2DimCorrect = 2
+        Assert.CheckEqual(t2ShapeCorrect, t2.shape)
+        Assert.CheckEqual(t2DimCorrect, t2.dim)
+        Assert.CheckEqual(combo.dtype, t2.dtype)
+
+        let t3 = combo.tensor([[[ofDouble 1.; ofDouble 2.; ofDouble 3.]; [ofDouble 4.; ofDouble 5.; ofDouble 6.]]])
+        let t3ShapeCorrect = [|1; 2; 3|]
+        let t3DimCorrect = 3
+
+        Assert.CheckEqual(t3ShapeCorrect, t3.shape)
+        Assert.CheckEqual(t3DimCorrect, t3.dim)
+        Assert.CheckEqual(combo.dtype, t3.dtype)
+
+        let t4 = combo.tensor([[[[ofDouble 1.; ofDouble 2.]]]])
+        let t4ShapeCorrect = [|1; 1; 1; 2|]
+        let t4DimCorrect = 4
+
+        Assert.CheckEqual(t4ShapeCorrect, t4.shape)
+        Assert.CheckEqual(t4DimCorrect, t4.dim)
+        Assert.CheckEqual(combo.dtype, t4.dtype)
+
+        let t5 = combo.tensor([[[[[ofDouble 1.; ofDouble 2.]]]]])
+        let t5ShapeCorrect = [|1; 1; 1; 1; 2|]
+        let t5DimCorrect = 5
+
+        Assert.CheckEqual(t5ShapeCorrect, t5.shape)
+        Assert.CheckEqual(t5DimCorrect, t5.dim)
+        Assert.CheckEqual(combo.dtype, t5.dtype)
+
+        let t6 = combo.tensor([[[[[[ofDouble 1.; ofDouble 2.]]]]]])
+        let t6ShapeCorrect = [|1; 1; 1; 1; 1; 2|]
+        let t6DimCorrect = 6
+
+        Assert.CheckEqual(t6ShapeCorrect, t6.shape)
+        Assert.CheckEqual(t6DimCorrect, t6.dim)
+        Assert.CheckEqual(combo.dtype, t6.dtype)
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromFloat64Data() =
+        this.TestTensorCreateAllTensorTypesGeneric id
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromFloat32Data() =
+        this.TestTensorCreateAllTensorTypesGeneric float32
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromInt32Data() =
+        this.TestTensorCreateAllTensorTypesGeneric int32
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromInt8Data() =
+        this.TestTensorCreateAllTensorTypesGeneric int8
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromInt16Data() =
+        this.TestTensorCreateAllTensorTypesGeneric int16
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromInt64Data() =
+        this.TestTensorCreateAllTensorTypesGeneric int64
+
+    [<Test>]
+    member this.TestTensorCreateAllTensorTypesFromBoolData() =
+        this.TestTensorCreateAllTensorTypesGeneric (fun i -> abs i >= 1.0)
+
+        let t1 = dsharp.tensor([true, true])
+        Assert.CheckEqual(Dtype.Bool, t1.dtype)
+
+        let t2 = dsharp.tensor([true, false])
+        Assert.CheckEqual(Dtype.Bool, t2.dtype)
+
+        let t3 = dsharp.tensor([true; false])
+        Assert.CheckEqual(Dtype.Bool, t3.dtype)
+
+        let t4 = dsharp.tensor([true; false], dtype=Dtype.Float32)
+        Assert.CheckEqual(Dtype.Float32, t4.dtype)
+
+    [<Test>]
+    member _.TestTensorCreateDtypeInferredFromData () =
+        for combo in Combos.AllDevicesAndBackendsFloat32 do
+            let dataFloat32 = [1.f;2.f;3.f]
+            let tensorFloat32 = dsharp.tensor(dataFloat32, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorFloat32.dtype, Dtype.Float32)
+
+            // Exception: If data is double and no dtype is given by the user, prefer a Float32 tensor
+            let dataFloat64 = [1.;2.;3.]
+            let tensorFloat64 = dsharp.tensor(dataFloat64, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorFloat64.dtype, Dtype.Float32)
+
+            let dataInt64 = [1L;2L;3L]
+            let tensorInt64 = dsharp.tensor(dataInt64, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorInt64.dtype, Dtype.Int64)
+
+            let dataInt32 = [1;2;3]
+            let tensorInt32 = dsharp.tensor(dataInt32, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorInt32.dtype, Dtype.Int32)
+
+            let dataInt16 = [1s;2s;3s]
+            let tensorInt16 = dsharp.tensor(dataInt16, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorInt16.dtype, Dtype.Int16)
+
+            let dataInt8 = [1y;2y;3y]
+            let tensorInt8 = dsharp.tensor(dataInt8, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorInt8.dtype, Dtype.Int8)
+
+            let dataByte = [1uy;2uy;3uy]
+            let tensorByte = dsharp.tensor(dataByte, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorByte.dtype, Dtype.Byte)
+
+            let dataBool = [true;true;false]
+            let tensorBool = dsharp.tensor(dataBool, ?dtype=None, device=combo.device, backend=combo.backend)
+            Assert.AreEqual(tensorBool.dtype, Dtype.Bool)
+
+    [<Test>]
+    member _.TestTensorHandle () =
+        for combo in Combos.Float32 do
+           if combo.backend = Backend.Reference then
+               let t1 = combo.tensor([1.0f ; 1.0f ])
+               Assert.CheckEqual([| 1.0f ; 1.0f |], (t1.primalRaw.Handle :?> float32[]))
+
+    [<Test>]
+    member _.TestTensorCreate0 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        let t0 = combo.tensor(1.)
+        let t0Shape = t0.shape
+        let t0Dim = t0.dim
+        let t0ShapeCorrect = [||]
+        let t0DimCorrect = 0
+
+        Assert.CheckEqual(t0DimCorrect, t0Dim)
+        Assert.CheckEqual(t0ShapeCorrect, t0Shape)
+
+    [<Test>]
+    member _.TestTensorCreate1 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        // create from double list
+        let t1 = combo.tensor([1.; 2.; 3.])
+        let t1ShapeCorrect = [|3|]
+        let t1DimCorrect = 1
+
+        Assert.CheckEqual(t1ShapeCorrect, t1.shape)
+        Assert.CheckEqual(t1DimCorrect, t1.dim)
+
+        // create from double[]
+        let t1Array = combo.tensor([| 1.; 2.; 3. |])
+
+        Assert.CheckEqual(t1ShapeCorrect, t1Array.shape)
+        Assert.CheckEqual(t1DimCorrect, t1Array.dim)
+
+        // create from seq<double>
+        let t1Seq = combo.tensor(seq { 1.; 2.; 3. })
+
+        Assert.CheckEqual(t1ShapeCorrect, t1Seq.shape)
+        Assert.CheckEqual(t1DimCorrect, t1Seq.dim)
+
+    [<Test>]
+    member _.TestTensorCreate2 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        let t2Values = [[1.; 2.; 3.]; [4.; 5.; 6.]]
+        let t2ShapeCorrect = [|2; 3|]
+        let t2DimCorrect = 2
+        // let t2DtypeCorrect = Dtype.Float32
+        let t2ValuesCorrect = array2D (List.map (List.map float32) t2Values)
+
+        // create from double list list
+        let t2 = combo.tensor([[1.; 2.; 3.]; [4.; 5.; 6.]])
+        Assert.CheckEqual(t2ShapeCorrect, t2.shape)
+        Assert.CheckEqual(t2DimCorrect, t2.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2.toArray() :?> float32[,])
+
+        // create from double array list
+        let t2ArrayList = combo.tensor([[|1.; 2.; 3.|]; [|4.; 5.; 6.|]])
+        Assert.CheckEqual(t2ShapeCorrect, t2ArrayList.shape)
+        Assert.CheckEqual(t2DimCorrect, t2ArrayList.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2ArrayList.toArray() :?> float32[,])
+
+        // create from double list array
+        let t2ListArray = combo.tensor([| [1.; 2.; 3.]; [4.; 5.; 6.] |])
+        Assert.CheckEqual(t2ShapeCorrect, t2ListArray.shape)
+        Assert.CheckEqual(t2DimCorrect, t2ListArray.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2ListArray.toArray() :?> float32[,])
+
+        // create from double[][]
+        let t2ArrayArray = combo.tensor([| [| 1.; 2.; 3. |]; [| 4.; 5.; 6.|] |])
+        Assert.CheckEqual(t2ShapeCorrect, t2ArrayArray.shape)
+        Assert.CheckEqual(t2DimCorrect, t2ArrayArray.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2ArrayArray.toArray() :?> float32[,])
+
+        // create from double[,]
+        let t2Array2D = combo.tensor(array2D [| [| 1.; 2.; 3. |]; [| 4.; 5.; 6.|] |])
+        Assert.CheckEqual(t2ShapeCorrect, t2Array2D.shape)
+        Assert.CheckEqual(t2DimCorrect, t2Array2D.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2Array2D.toArray() :?> float32[,])
+
+        // create from seq<double[]>
+        let t2ArraySeq = combo.tensor(seq { yield [| 1.; 2.; 3. |]; yield [| 4.; 5.; 6.|] })
+        Assert.CheckEqual(t2ShapeCorrect, t2ArraySeq.shape)
+        Assert.CheckEqual(t2DimCorrect, t2ArraySeq.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2ArraySeq.toArray() :?> float32[,])
+
+        // create from seq<seq<double>>
+        let t2SeqSeq = combo.tensor(seq { seq { 1.; 2.; 3. }; seq { 4.; 5.; 6.} })
+        Assert.CheckEqual(t2ShapeCorrect, t2SeqSeq.shape)
+        Assert.CheckEqual(t2DimCorrect, t2SeqSeq.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2SeqSeq.toArray() :?> float32[,])
+
+        // create from (double * double * double) list list
+        let t2TupleListList = combo.tensor([ [ 1., 2., 3. ]; [ 4., 5., 6. ] ])
+        Assert.CheckEqual(t2ShapeCorrect, t2TupleListList.shape)
+        Assert.CheckEqual(t2DimCorrect, t2TupleListList.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2TupleListList.toArray() :?> float32[,])
+
+        // create from ((double * double * double) list * (double * double * double) list) list
+        let t2TupleListTupleList = combo.tensor([ [ 1., 2., 3. ], [ 4., 5., 6. ] ])
+        Assert.CheckEqual(t2ShapeCorrect, t2TupleListTupleList.shape)
+        Assert.CheckEqual(t2DimCorrect, t2TupleListTupleList.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2TupleListTupleList.toArray() :?> float32[,])
+
+        // create from (double * double * double)[]
+        let t2TupleArray = combo.tensor([| [ 1., 2., 3. ]; [ 4., 5., 6. ] |])
+        Assert.CheckEqual(t2ShapeCorrect, t2TupleArray.shape)
+        Assert.CheckEqual(t2DimCorrect, t2TupleArray.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2TupleArray.toArray() :?> float32[,])
+
+        // create from ((double * double * double) [] * (double * double * double) []) []
+        let t2TupleArrayTupleArray = combo.tensor([| [| 1., 2., 3. |], [| 4., 5., 6. |] |])
+        Assert.CheckEqual(t2ShapeCorrect, t2TupleArrayTupleArray.shape)
+        Assert.CheckEqual(t2DimCorrect, t2TupleArrayTupleArray.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2TupleArrayTupleArray.toArray() :?> float32[,])
+        Assert.CheckEqual(t2ValuesCorrect, t2TupleArrayTupleArray.toArray() :?> float32[,])
+
+        // create from (double * double * double)seq
+        let t2TupleArray = combo.tensor(seq { [ 1., 2., 3. ]; [ 4., 5., 6. ] })
+        Assert.CheckEqual(t2ShapeCorrect, t2TupleArray.shape)
+        Assert.CheckEqual(t2DimCorrect, t2TupleArray.dim)
+        Assert.CheckEqual(t2ValuesCorrect, t2TupleArray.toArray() :?> float32[,])
+
+        let t2TupleOfList = combo.tensor [[2.], [3.], [4.]]
+        Assert.CheckEqual([| 3; 1 |], t2TupleOfList.shape)
+        Assert.CheckEqual(array2D [ [2.f]; [3.f]; [4.f] ], t2TupleOfList.toArray() :?> float32[,])
+
+    [<Test>]
+    member _.TestTensorCreate3 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        let t3Values = [[[1.; 2.; 3.]; [4.; 5.; 6.]]]
+        let t3 = combo.tensor(t3Values)
+        let t3ShapeCorrect = [|1; 2; 3|]
+        let t3DimCorrect = 3
+        let t3ValuesCorrect = array3D (List.map (List.map (List.map float32)) t3Values)
+
+        Assert.CheckEqual(t3ShapeCorrect, t3.shape)
+        Assert.CheckEqual(t3DimCorrect, t3.dim)
+        Assert.CheckEqual(t3ValuesCorrect, t3.toArray() :?> float32[,,])
+
+    [<Test>]
+    member _.TestTensorCreate4 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        let t4Values = [[[[1.; 2.]]]]
+        let t4 = combo.tensor(t4Values)
+        let t4ShapeCorrect = [|1; 1; 1; 2|]
+        let t4DimCorrect = 4
+        let t4ValuesCorrect = array4D (List.map (List.map (List.map (List.map float32))) t4Values)
+
+        Assert.CheckEqual(t4ShapeCorrect, t4.shape)
+        Assert.CheckEqual(t4DimCorrect, t4.dim)
+        Assert.CheckEqual(t4ValuesCorrect, t4.toArray() :?> float32[,,,])
+
+    [<Test>]
+    member _.TestTensorCreate5 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        let t5Values = [[[[[1.; 2.]]]]]
+        let t5 = combo.tensor(t5Values)
+        let t5ShapeCorrect = [|1; 1; 1; 1; 2|]
+        let t5DimCorrect = 5
+        let t5ValuesCorrect = array5D (List.map (List.map (List.map (List.map (List.map float32)))) t5Values)
+
+        Assert.CheckEqual(t5ShapeCorrect, t5.shape)
+        Assert.CheckEqual(t5DimCorrect, t5.dim)
+        Assert.CheckEqual(t5ValuesCorrect, t5.toArray())
+
+    [<Test>]
+    member _.TestTensorCreate6 () =
+      for combo in Combos.AllDevicesAndBackendsFloat32 do
+        let t6Values = [[[[[[1.; 2.]]]]]]
+        let t6 = combo.tensor(t6Values)
+        let t6ShapeCorrect = [|1; 1; 1; 1; 1; 2|]
+        let t6DimCorrect = 6
+        let t6ValuesCorrect = array6D (List.map (List.map (List.map (List.map (List.map (List.map float32))))) t6Values)
+
+        Assert.CheckEqual(t6ShapeCorrect, t6.shape)
+        Assert.CheckEqual(t6DimCorrect, t6.dim)
+        Assert.CheckEqual(t6ValuesCorrect, t6.toArray())
+
+    [<Test>]
+    member this.TestTensorCreateFromTensor4 () =
+        let t4Values = [[[[dsharp.tensor 1.; dsharp.tensor 2.]]]]
+        let t4 = dsharp.tensor(t4Values)
+        let t4ShapeCorrect = [|1; 1; 1; 2|]
+        let t4DimCorrect = 4
+        let t4ValuesCorrect = array4D (List.map (List.map (List.map (List.map float32))) t4Values)
+
+        Assert.AreEqual(t4ShapeCorrect, t4.shape)
+        Assert.AreEqual(t4DimCorrect, t4.dim)
+        // TODO
+        // Assert.AreEqual(t4ValuesCorrect, t4.toArray())
+
+    [<Test>]
+    member this.TestTensorCreateFromTensor5 () =
+        let t5Values = [[[[[dsharp.tensor 1.; dsharp.tensor 2.]]]]]
+        let t5 = dsharp.tensor(t5Values)
+        let t5ShapeCorrect = [|1; 1; 1; 1; 2|]
+        let t5DimCorrect = 5
+        let t5ValuesCorrect = array5D (List.map (List.map (List.map (List.map (List.map float32)))) t5Values)
+
+        Assert.AreEqual(t5ShapeCorrect, t5.shape)
+        Assert.AreEqual(t5DimCorrect, t5.dim)
+        Assert.AreEqual(t5ValuesCorrect, t5.toArray())
+
+    [<Test>]
+    member this.TestTensorCreateFromTensor6 () =
+        let t6Values = [[[[[[dsharp.tensor 1.; dsharp.tensor 2.]]]]]]
+        let t6 = dsharp.tensor(t6Values)
+        let t6ShapeCorrect = [|1; 1; 1; 1; 1; 2|]
+        let t6DimCorrect = 6
+        let t6ValuesCorrect = array6D (List.map (List.map (List.map (List.map (List.map (List.map float32))))) t6Values)
+
+        Assert.AreEqual(t6ShapeCorrect, t6.shape)
+        Assert.AreEqual(t6DimCorrect, t6.dim)
+        Assert.AreEqual(t6ValuesCorrect, t6.toArray())
+
+    [<Test>]
+    member _.TensorCreateDistinguishByteAndInt8 () =
+        let v1 = [|10uy; 25uy; 125uy; 220uy|] // Byte
+        let t1 = dsharp.tensor(v1, dtype=Dtype.Float32)
+        let a1 = t1.toArray1D<float32>()
+        let a1Correct = [|10.f; 25.f; 125.f; 220.f|]
+        Assert.AreEqual(a1Correct, a1)
+
+        let v2 = [|10y; 25y; 125y; -20y|] // Int8
+        let t2 = dsharp.tensor(v2, dtype=Dtype.Float32)
+        let a2 = t2.toArray1D<float32>() // The result becomes [|10.; 25.; 125.; 236.|] when Int8 is confused with Byte
+        let a2Correct = [|10.f; 25.f; 125.f; -20.f|]
+        Assert.AreEqual(a2Correct, a2)
+
+    [<Test>]
+    member _.TestTensorToArray () =
+        for combo in Combos.All do 
+            let a = array2D [[1.; 2.]; [3.; 4.]]
+            let t = combo.tensor(a)
+            let tArray = t.toArray()
+            let tArrayCorrect = combo.arrayCreator2D a
+            Assert.CheckEqual(tArrayCorrect, tArray)
+
+            let a1 = [|0.; 1.; 0.|]
+            let t1 = combo.tensor(a1)
+            let t1aFloat32 = t1.toArray1D<float32>()
+            let t1aFloat32Correct = a1 |> Array.map (fun v -> System.Convert.ToSingle(v))
+            Assert.AreEqual(t1aFloat32Correct, t1aFloat32)
+
+            let t1aFloat64 = t1.toArray1D<double>()
+            let t1aFloat64Correct = a1 |> Array.map (fun v -> System.Convert.ToDouble(v))
+            Assert.AreEqual(t1aFloat64Correct, t1aFloat64)
+
+            let t1aInt64 = t1.toArray1D<int64>()
+            let t1aInt64Correct = a1 |> Array.map (fun v -> System.Convert.ToInt64(v))
+            Assert.AreEqual(t1aInt64Correct, t1aInt64)
+
+            let t1aInt32 = t1.toArray1D<int32>()
+            let t1aInt32Correct = a1 |> Array.map (fun v -> System.Convert.ToInt32(v))
+            Assert.AreEqual(t1aInt32Correct, t1aInt32)
+
+            let t1aInt16 = t1.toArray1D<int16>()
+            let t1aInt16Correct = a1 |> Array.map (fun v -> System.Convert.ToInt16(v))
+            Assert.AreEqual(t1aInt16Correct, t1aInt16)
+
+            let t1aInt8 = t1.toArray1D<int8>()
+            let t1aInt8Correct = a1 |> Array.map (fun v -> System.Convert.ToSByte(v))
+            Assert.AreEqual(t1aInt8Correct, t1aInt8)
+
+            let t1aByte = t1.toArray1D<byte>()
+            let t1aByteCorrect = a1 |> Array.map (fun v -> System.Convert.ToByte(v))
+            Assert.AreEqual(t1aByteCorrect, t1aByte)
+
+            let t1aBool = t1.toArray1D<bool>()
+            let t1aBoolCorrect = a1 |> Array.map (fun v -> System.Convert.ToBoolean(v))
+            Assert.AreEqual(t1aBoolCorrect, t1aBool)
+
+
+            let a2 = array2D [[0.; 1.; 0.]; [1.; 0.; 1.]]
+            let t2 = combo.tensor(a2)
+            let t2aFloat32 = t2.toArray2D<float32>()
+            let t2aFloat32Correct = a2 |> Array2D.map (fun v -> System.Convert.ToSingle(v))
+            Assert.AreEqual(t2aFloat32Correct, t2aFloat32)
+
+            let t2aFloat64 = t2.toArray2D<double>()
+            let t2aFloat64Correct = a2 |> Array2D.map (fun v -> System.Convert.ToDouble(v))
+            Assert.AreEqual(t2aFloat64Correct, t2aFloat64)
+
+            let t2aInt64 = t2.toArray2D<int64>()
+            let t2aInt64Correct = a2 |> Array2D.map (fun v -> System.Convert.ToInt64(v))
+            Assert.AreEqual(t2aInt64Correct, t2aInt64)
+
+            let t2aInt32 = t2.toArray2D<int32>()
+            let t2aInt32Correct = a2 |> Array2D.map (fun v -> System.Convert.ToInt32(v))
+            Assert.AreEqual(t2aInt32Correct, t2aInt32)
+
+            let t2aInt16 = t2.toArray2D<int16>()
+            let t2aInt16Correct = a2 |> Array2D.map (fun v -> System.Convert.ToInt16(v))
+            Assert.AreEqual(t2aInt16Correct, t2aInt16)
+
+            let t2aInt8 = t2.toArray2D<int8>()
+            let t2aInt8Correct = a2 |> Array2D.map (fun v -> System.Convert.ToSByte(v))
+            Assert.AreEqual(t2aInt8Correct, t2aInt8)
+
+            let t2aByte = t2.toArray2D<byte>()
+            let t2aByteCorrect = a2 |> Array2D.map (fun v -> System.Convert.ToByte(v))
+            Assert.AreEqual(t2aByteCorrect, t2aByte)
+
+            let t2aBool = t2.toArray2D<bool>()
+            let t2aBoolCorrect = a2 |> Array2D.map (fun v -> System.Convert.ToBoolean(v))
+            Assert.AreEqual(t2aBoolCorrect, t2aBool)
+
+
+            let a3 = array3D [[[0.; 1.; 0.]; [1.; 0.; 1.]]]
+            let t3 = combo.tensor(a3)
+            let t3aFloat32 = t3.toArray3D<float32>()
+            let t3aFloat32Correct = a3 |> Array3D.map (fun v -> System.Convert.ToSingle(v))
+            Assert.AreEqual(t3aFloat32Correct, t3aFloat32)
+
+            let t3aFloat64 = t3.toArray3D<double>()
+            let t3aFloat64Correct = a3 |> Array3D.map (fun v -> System.Convert.ToDouble(v))
+            Assert.AreEqual(t3aFloat64Correct, t3aFloat64)
+
+            let t3aInt64 = t3.toArray3D<int64>()
+            let t3aInt64Correct = a3 |> Array3D.map (fun v -> System.Convert.ToInt64(v))
+            Assert.AreEqual(t3aInt64Correct, t3aInt64)
+
+            let t3aInt32 = t3.toArray3D<int32>()
+            let t3aInt32Correct = a3 |> Array3D.map (fun v -> System.Convert.ToInt32(v))
+            Assert.AreEqual(t3aInt32Correct, t3aInt32)
+
+            let t3aInt16 = t3.toArray3D<int16>()
+            let t3aInt16Correct = a3 |> Array3D.map (fun v -> System.Convert.ToInt16(v))
+            Assert.AreEqual(t3aInt16Correct, t3aInt16)
+
+            let t3aInt8 = t3.toArray3D<int8>()
+            let t3aInt8Correct = a3 |> Array3D.map (fun v -> System.Convert.ToSByte(v))
+            Assert.AreEqual(t3aInt8Correct, t3aInt8)
+
+            let t3aByte = t3.toArray3D<byte>()
+            let t3aByteCorrect = a3 |> Array3D.map (fun v -> System.Convert.ToByte(v))
+            Assert.AreEqual(t3aByteCorrect, t3aByte)
+
+            let t3aBool = t3.toArray3D<bool>()
+            let t3aBoolCorrect = a3 |> Array3D.map (fun v -> System.Convert.ToBoolean(v))
+            Assert.AreEqual(t3aBoolCorrect, t3aBool)
+
+
+            let a4 = array4D [[[[0.; 1.; 0.]; [1.; 0.; 1.]]]]
+            let t4 = combo.tensor(a4)
+            let t4aFloat32 = t4.toArray4D<float32>()
+            let t4aFloat32Correct = a4 |> Array4D.map (fun v -> System.Convert.ToSingle(v))
+            Assert.AreEqual(t4aFloat32Correct, t4aFloat32)
+
+            let t4aFloat64 = t4.toArray4D<double>()
+            let t4aFloat64Correct = a4 |> Array4D.map (fun v -> System.Convert.ToDouble(v))
+            Assert.AreEqual(t4aFloat64Correct, t4aFloat64)
+
+            let t4aInt64 = t4.toArray4D<int64>()
+            let t4aInt64Correct = a4 |> Array4D.map (fun v -> System.Convert.ToInt64(v))
+            Assert.AreEqual(t4aInt64Correct, t4aInt64)
+
+            let t4aInt32 = t4.toArray4D<int32>()
+            let t4aInt32Correct = a4 |> Array4D.map (fun v -> System.Convert.ToInt32(v))
+            Assert.AreEqual(t4aInt32Correct, t4aInt32)
+
+            let t4aInt16 = t4.toArray4D<int16>()
+            let t4aInt16Correct = a4 |> Array4D.map (fun v -> System.Convert.ToInt16(v))
+            Assert.AreEqual(t4aInt16Correct, t4aInt16)
+
+            let t4aInt8 = t4.toArray4D<int8>()
+            let t4aInt8Correct = a4 |> Array4D.map (fun v -> System.Convert.ToSByte(v))
+            Assert.AreEqual(t4aInt8Correct, t4aInt8)
+
+            let t4aByte = t4.toArray4D<byte>()
+            let t4aByteCorrect = a4 |> Array4D.map (fun v -> System.Convert.ToByte(v))
+            Assert.AreEqual(t4aByteCorrect, t4aByte)
+
+            let t4aBool = t4.toArray4D<bool>()
+            let t4aBoolCorrect = a4 |> Array4D.map (fun v -> System.Convert.ToBoolean(v))
+            Assert.AreEqual(t4aBoolCorrect, t4aBool)
+
+
+            let a5 = array5D [[[[[0.; 1.; 0.]; [1.; 0.; 1.]]]]]
+            let t5 = combo.tensor(a5)
+            let t5aFloat32 = t5.toArray5D<float32>()
+            let t5aFloat32Correct = a5 |> Array5D.map (fun v -> System.Convert.ToSingle(v))
+            Assert.AreEqual(t5aFloat32Correct, t5aFloat32)
+
+            let t5aFloat64 = t5.toArray5D<double>()
+            let t5aFloat64Correct = a5 |> Array5D.map (fun v -> System.Convert.ToDouble(v))
+            Assert.AreEqual(t5aFloat64Correct, t5aFloat64)
+
+            let t5aInt64 = t5.toArray5D<int64>()
+            let t5aInt64Correct = a5 |> Array5D.map (fun v -> System.Convert.ToInt64(v))
+            Assert.AreEqual(t5aInt64Correct, t5aInt64)
+
+            let t5aInt32 = t5.toArray5D<int32>()
+            let t5aInt32Correct = a5 |> Array5D.map (fun v -> System.Convert.ToInt32(v))
+            Assert.AreEqual(t5aInt32Correct, t5aInt32)
+
+            let t5aInt16 = t5.toArray5D<int16>()
+            let t5aInt16Correct = a5 |> Array5D.map (fun v -> System.Convert.ToInt16(v))
+            Assert.AreEqual(t5aInt16Correct, t5aInt16)
+
+            let t5aInt8 = t5.toArray5D<int8>()
+            let t5aInt8Correct = a5 |> Array5D.map (fun v -> System.Convert.ToSByte(v))
+            Assert.AreEqual(t5aInt8Correct, t5aInt8)
+
+            let t5aByte = t5.toArray5D<byte>()
+            let t5aByteCorrect = a5 |> Array5D.map (fun v -> System.Convert.ToByte(v))
+            Assert.AreEqual(t5aByteCorrect, t5aByte)
+
+            let t5aBool = t5.toArray5D<bool>()
+            let t5aBoolCorrect = a5 |> Array5D.map (fun v -> System.Convert.ToBoolean(v))
+            Assert.AreEqual(t5aBoolCorrect, t5aBool)
+
+
+            let a6 = array6D [[[[[[0.; 1.; 0.]; [1.; 0.; 1.]]]]]]
+            let t6 = combo.tensor(a6)
+            let t6aFloat32 = t6.toArray6D<float32>()
+            let t6aFloat32Correct = a6 |> Array6D.map (fun v -> System.Convert.ToSingle(v))
+            Assert.AreEqual(t6aFloat32Correct, t6aFloat32)
+
+            let t6aFloat64 = t6.toArray6D<double>()
+            let t6aFloat64Correct = a6 |> Array6D.map (fun v -> System.Convert.ToDouble(v))
+            Assert.AreEqual(t6aFloat64Correct, t6aFloat64)
+
+            let t6aInt64 = t6.toArray6D<int64>()
+            let t6aInt64Correct = a6 |> Array6D.map (fun v -> System.Convert.ToInt64(v))
+            Assert.AreEqual(t6aInt64Correct, t6aInt64)
+
+            let t6aInt32 = t6.toArray6D<int32>()
+            let t6aInt32Correct = a6 |> Array6D.map (fun v -> System.Convert.ToInt32(v))
+            Assert.AreEqual(t6aInt32Correct, t6aInt32)
+
+            let t6aInt16 = t6.toArray6D<int16>()
+            let t6aInt16Correct = a6 |> Array6D.map (fun v -> System.Convert.ToInt16(v))
+            Assert.AreEqual(t6aInt16Correct, t6aInt16)
+
+            let t6aInt8 = t6.toArray6D<int8>()
+            let t6aInt8Correct = a6 |> Array6D.map (fun v -> System.Convert.ToSByte(v))
+            Assert.AreEqual(t6aInt8Correct, t6aInt8)
+
+            let t6aByte = t6.toArray6D<byte>()
+            let t6aByteCorrect = a6 |> Array6D.map (fun v -> System.Convert.ToByte(v))
+            Assert.AreEqual(t6aByteCorrect, t6aByte)
+
+            let t6aBool = t6.toArray6D<bool>()
+            let t6aBoolCorrect = a6 |> Array6D.map (fun v -> System.Convert.ToBoolean(v))
+            Assert.AreEqual(t6aBoolCorrect, t6aBool)
+
+    [<Test>]
+    member _.TestTensorSaveSaveAndLoadToSpecificConfiguration () =
+        let fileName = System.IO.Path.GetTempFileName()
+        for combo in Combos.All do 
+            let a = combo.tensor([[1,2],[3,4]])
+            a.save(fileName)
+            let b = Tensor.load(fileName, device=combo.device, dtype=combo.dtype, backend=combo.backend)
+            Assert.CheckEqual(a, b)
+
+    [<Test>]
+    member _.TestTensorSaveLoadBackToDefaultConfiguarionThenMoveToCombo () =
+        let fileName = System.IO.Path.GetTempFileName()
+        for combo in Combos.All do 
+            let a = combo.tensor([[1,2],[3,4]])
+            a.save(fileName)
+            let b = Tensor.load(fileName)
+            let bInCombo = combo.move(b)
+            Assert.CheckEqual(a, bInCombo)
+
+    [<Test>]
+    member _.TestTensorSaveLoadBackToDefaultConfiguarion () =
+        let fileName = System.IO.Path.GetTempFileName()
+        for combo in Combos.All do 
+            let a = combo.tensor([[1,2],[3,4]])
+            a.save(fileName)
+            let aInDefault = a.move(device=Device.Default, backend=Backend.Default)
+            let b = Tensor.load(fileName, dtype = combo.dtype)
+            Assert.CheckEqual(aInDefault, b)
+
+    [<Test>]
+    member _.TestTensorSaveLoadConfiguarion () =
+        let fileName = System.IO.Path.GetTempFileName()
+        let a = dsharp.tensor([[1,2],[3,4]])
+        a.save(fileName)
+        for combo in Combos.All do 
+            let aInCombo = combo.move(a)
+            let b = Tensor.load(fileName, device=combo.device, dtype=combo.dtype, backend=combo.backend)
+            Assert.CheckEqual(aInCombo, b)
+
+    [<Test>]
+    member _.TestTensorClone () =
+        for combo in Combos.All do 
+            let a = combo.randint(0,100,[10;10])
+            let b = a.clone()
+            Assert.CheckEqual(a, b)
+            Assert.CheckEqual(a.dtype, b.dtype)
+
+    [<Test>]
+    member _.TestTensorFull () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1a = combo.full([2;3], 2)
+            let t1b = combo.ones([2;3]) * 2
+            let t2a = combo.full([], 2)
+            let t2b = combo.ones([]) * 2
+            Assert.CheckEqual(t1a, t1b)
+            Assert.CheckEqual(t2a, t2b)
+
+        for combo in Combos.All do 
+            let t1 = combo.full([2], 1)
+            let t1Expected = combo.tensor([1,1])
+            Assert.CheckEqual(t1, t1Expected)
+
+    [<Test>]
+    member _.TestTensorZero () =
+        for combo in Combos.All do 
+            let t1 = combo.zero()
+            let t1Expected = combo.tensor(0)
+            Assert.CheckEqual(t1, t1Expected)
+            Assert.CheckEqual(t1.shape, ([| |]: int32[]) )
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorZerosDisposal () =
+        for i in 0..1024 do
+            let _ = dsharp.zeros([1024; 1024])
+            // printfn "%A" i
+            System.GC.Collect()
+        Assert.That(true)
+
+
+    [<Test>]
+    member _.TestTensorZeros () =
+        for combo in Combos.All do 
+            let t0 = combo.zeros([])
+            let t0Expected = combo.tensor(0)
+            Assert.CheckEqual(t0.shape, ([| |]: int32[]) )
+            Assert.CheckEqual(t0.dtype, combo.dtype)
+            Assert.CheckEqual(t0, t0Expected)
+
+            let t1 = combo.zeros([2])
+            let t1Expected = combo.tensor([0,0])
+            Assert.CheckEqual(t1.shape, ([| 2 |]: int32[]) )
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+            Assert.CheckEqual(t1, t1Expected)
+
+    [<Test>]
+    member _.TestTensorEmpty () =
+        for combo in Combos.All do 
+            // Empty tensor with no data
+            // This is not a scalar, and shape [|0|] signifies the absence of data and dimensions
+            let tvoid = combo.empty()
+            Assert.CheckEqual(tvoid.shape, ([|0|]: int32[]) )
+            Assert.CheckEqual(tvoid.dtype, combo.dtype)
+
+            // Scalar (zero-dimensional) tensor
+            // Shape [||] siginifes zero dimensions
+            // Tensor data is not initialized and can be random or zero depending on the backend used
+            let t0 = combo.empty([])
+            Assert.CheckEqual(t0.shape, ([| |]: int32[]) )
+            Assert.CheckEqual(t0.dtype, combo.dtype)
+
+            // Vector
+            // Tensor data is not initialized and can be random or zero depending on the backend used
+            let t1 = combo.empty([2])
+            Assert.CheckEqual(t1.shape, ([| 2 |]: int32[]) )
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorOne () =
+        for combo in Combos.All do 
+            let t1 = combo.one()
+            let t1Expected = combo.tensor(1)
+            Assert.CheckEqual(t1, t1Expected)
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+            Assert.CheckEqual(t1.shape, ([| |]: int32[]) )
+
+    [<Test>]
+    member _.TestTensorOnes () =
+        for combo in Combos.All do 
+            let t0 = combo.ones([])
+            let t0Expected = combo.tensor(1)
+            Assert.CheckEqual(t0.shape, ([| |]: int32[]) )
+            Assert.CheckEqual(t0.dtype, combo.dtype)
+            Assert.CheckEqual(t0, t0Expected)
+
+            let t1 = combo.ones([2])
+            let t1Expected = combo.tensor([1,1])
+            Assert.CheckEqual(t1, t1Expected)
+    [<Test>]
+    member _.TestTensorIsTensor () =
+        for combo in Combos.All do 
+            let a = 2.
+            let b = combo.tensor(2.)
+            Assert.That(not (dsharp.isTensor(a)))
+            Assert.That(dsharp.isTensor(b))
+
+    [<Test>]
+    member _.TestTensorConvert () =
+        for combo in Combos.IntegralAndFloatingPoint do
+            let v = 2.
+            let t = combo.tensor(v)
+            let tsingle = single t
+            let tdouble = double t
+            let tint16 = int16 t
+            let tint32 = int32 t
+            let tint64 = int64 t
+            let tsingleCorrect = single v
+            let tdoubleCorrect = double v
+            let tint16Correct = int16 v
+            let tint32Correct = int32 v
+            let tint64Correct = int64 v
+            Assert.CheckEqual(tsingleCorrect, tsingle)
+            Assert.CheckEqual(tdoubleCorrect, tdouble)
+            Assert.CheckEqual(tint16Correct, tint16)
+            Assert.CheckEqual(tint32Correct, tint32)
+            Assert.CheckEqual(tint64Correct, tint64)
+
+        for combo in Combos.IntegralAndFloatingPoint do
+            let v = 2.
+            let t = combo.tensor(v)
+            let tsingle = t.toSingle()
+            let tdouble = t.toDouble()
+            let tint16 = t.toInt16()
+            let tint32 = t.toInt32()
+            let tint64 = t.toInt64()
+            let tsingleCorrect = single v
+            let tdoubleCorrect = double v
+            let tint16Correct = int16 v
+            let tint32Correct = int32 v
+            let tint64Correct = int64 v
+            Assert.CheckEqual(tsingleCorrect, tsingle)
+            Assert.CheckEqual(tdoubleCorrect, tdouble)
+            Assert.CheckEqual(tint16Correct, tint16)
+            Assert.CheckEqual(tint32Correct, tint32)
+            Assert.CheckEqual(tint64Correct, tint64)
+
+        for combo in Combos.Bool do
+            let v = true
+            let t = combo.tensor(v)
+            let tbool = t.toBool()
+            let tboolCorrect = v
+            Assert.CheckEqual(tboolCorrect, tbool)
+
+    [<Test>]
+    member _.TestTensorConvertViaIConvertible () =
+        for combo in Combos.IntegralAndFloatingPoint do
+            let v = 2.
+            let t = combo.tensor(v)
+            let tsingle = Convert.ToSingle t
+            let tdouble = Convert.ToDouble t
+            let tint16 = Convert.ToInt16 t
+            let tint32 = Convert.ToInt32 t
+            let tint64 = Convert.ToInt64 t
+            let tsingleCorrect = Convert.ToSingle v
+            let tdoubleCorrect = Convert.ToDouble v
+            let tint16Correct = Convert.ToInt16 v
+            let tint32Correct = Convert.ToInt32 v
+            let tint64Correct = Convert.ToInt64 v
+            Assert.CheckEqual(tsingleCorrect, tsingle)
+            Assert.CheckEqual(tdoubleCorrect, tdouble)
+            Assert.CheckEqual(tint16Correct, tint16)
+            Assert.CheckEqual(tint32Correct, tint32)
+            Assert.CheckEqual(tint64Correct, tint64)
+
+            let t2 = combo.full([4], t) // You can use a scalar tensor as a scalar and the types are used correctly
+            let t2Correct = combo.tensor([2.; 2.; 2.; 2. ])
+            Assert.CheckEqual(t2, t2Correct)
+
+            let t3 = t2 + (t :> scalar)  // You can use a scalar tensor as a scalar and the types are used correctly
+            let t3Correct = combo.tensor([4.; 4.; 4.; 4. ])
+            Assert.CheckEqual(t3, t3Correct)
+
+    [<Test>]
+    member _.TestTensorToScalar () =
+        for combo in Combos.All do
+            let t = 1.
+            let t0 = combo.tensor(t)
+            let t1 = combo.tensor([t])
+            let t2 = combo.tensor([[t]])
+            let t3 = combo.tensor([[[t]]])
+
+            let t0s = float t0
+            let t1s = float t1
+            let t2s = float t2
+            let t3s = float t3
+
+            Assert.CheckEqual(t, t0s)
+            Assert.CheckEqual(t, t1s)
+            Assert.CheckEqual(t, t2s)
+            Assert.CheckEqual(t, t3s)
+
+    [<Test>]
+    member _.TestTensorOnehot () =
+        for combo in Combos.All do 
+            let t0 = combo.onehot(3, 0)
+            let t1 = combo.onehot(3, 1)
+            let t2 = combo.onehot(3, 2)
+            let t0Correct = combo.tensor([1,0,0])
+            let t1Correct = combo.tensor([0,1,0])
+            let t2Correct = combo.tensor([0,0,1])
+            Assert.CheckEqual(t0Correct, t0)
+            Assert.CheckEqual(t1Correct, t1)
+            Assert.CheckEqual(t2Correct, t2)
+
+    [<Test>]
+    // Test the underlying GetItem on the RawPrimal, useful when testing backends
+    member _.TestTensorGetItemOnPrimal () =
+      for combo in Combos.IntegralAndFloatingPoint do 
+        let t0 = combo.tensor(2.)
+        Assert.CheckEqual(2.0, t0.toDouble())
+
+        let t1 = combo.tensor([2., 3., 4., 5., 6.])
+        Assert.CheckEqual(2.0, t1.primalRaw.GetItem(0).toDouble())
+        Assert.CheckEqual(3.0, t1.primalRaw.GetItem(1).toDouble())
+        Assert.CheckEqual(4.0, t1.primalRaw.GetItem(2).toDouble())
+        Assert.CheckEqual(5.0, t1.primalRaw.GetItem(3).toDouble())
+        Assert.CheckEqual(6.0, t1.primalRaw.GetItem(4).toDouble())
+
+        let t2 = combo.tensor([[2.]; [3.]])
+        Assert.CheckEqual(2.0, t2.primalRaw.GetItem(0, 0).toDouble())
+        Assert.CheckEqual(3.0, t2.primalRaw.GetItem(1, 0).toDouble())
+
+        let t2b = combo.tensor([[1.;2.]; [3.;4.]])
+        Assert.CheckEqual(1.0, t2b.primalRaw.GetItem(0, 0).toDouble())
+        Assert.CheckEqual(2.0, t2b.primalRaw.GetItem(0, 1).toDouble())
+        Assert.CheckEqual(3.0, t2b.primalRaw.GetItem(1, 0).toDouble())
+        Assert.CheckEqual(4.0, t2b.primalRaw.GetItem(1, 1).toDouble())
+
+        let t3 = combo.tensor([[[2.; 3.]]])
+        Assert.CheckEqual(2.0, t3.primalRaw.GetItem(0, 0, 0).toDouble())
+        Assert.CheckEqual(3.0, t3.primalRaw.GetItem(0, 0, 1).toDouble())
+
+        let t4 = combo.tensor([[[[1.]]]])
+        Assert.CheckEqual(1.0, t4.primalRaw.GetItem(0, 0, 0, 0).toDouble())
+
+    [<Test>]
+    // Test the underlying GetItem on the RawPrimal, useful when testing backends
+    member _.TestTensorGetSliceOnPrimal () =
+      for combo in Combos.IntegralAndFloatingPoint do 
+        let t0 = combo.tensor(2.)
+        Assert.CheckEqual(2.0, t0.toDouble())
+
+        let t1 = combo.tensor([ 0 .. 10 ])
+        let t1slice1 = t1.primalRaw.GetSlice(array2D [ [ 3; 4; 0 ] ])
+        let t1slice2 = t1.primalRaw.GetSlice(array2D [ [ 3; 3; 0 ] ])
+
+        Assert.CheckEqual(3, (t1slice1.GetItem(0) |> Convert.ToInt32))
+        Assert.CheckEqual(4, (t1slice1.GetItem(1) |> Convert.ToInt32))
+        Assert.CheckEqual(1, t1slice1.Dim)
+        Assert.CheckEqual(2, t1slice1.Shape[0])
+
+        Assert.CheckEqual(3, (t1slice2.GetItem(0) |> Convert.ToInt32))
+        Assert.CheckEqual(1, t1slice2.Dim)
+        Assert.CheckEqual(1, t1slice2.Shape[0])
+
+        // TODO: slicing reducing down to scalar
+        //let t1slice3 = t1.primalRaw.GetSlice(array2D [ [ 3; 3; 1 ] ])
+        //Assert.CheckEqual(3, t1slice3.GetItem(0))
+        //Assert.CheckEqual(0, t1slice3.Dim)
+
+        let t2 = combo.tensor([ for i in 0 .. 10 -> [ i*10 .. i*10+10 ] ])
+        let t2slice1 = t2.primalRaw.GetSlice(array2D [ [ 3; 5; 0 ]; [ 3; 5; 0 ] ])
+
+        Assert.CheckEqual(33, t2slice1.GetItem(0, 0) |> Convert.ToInt32)
+        Assert.CheckEqual(34, t2slice1.GetItem(0, 1) |> Convert.ToInt32)
+        Assert.CheckEqual(35, t2slice1.GetItem(0, 2) |> Convert.ToInt32)
+        Assert.CheckEqual(43, t2slice1.GetItem(1, 0) |> Convert.ToInt32)
+        Assert.CheckEqual(44, t2slice1.GetItem(1, 1) |> Convert.ToInt32)
+        Assert.CheckEqual(45, t2slice1.GetItem(1, 2) |> Convert.ToInt32)
+        Assert.CheckEqual(53, t2slice1.GetItem(2, 0) |> Convert.ToInt32)
+        Assert.CheckEqual(54, t2slice1.GetItem(2, 1) |> Convert.ToInt32)
+        Assert.CheckEqual(55, t2slice1.GetItem(2, 2) |> Convert.ToInt32)
+
+        let t2slice2 = t2.primalRaw.GetSlice(array2D [ [ 3; 5; 0 ]; [ 3; 3; 1 ] ])
+        Assert.CheckEqual(33, t2slice2.GetItem(0) |> Convert.ToInt32)
+        Assert.CheckEqual(43, t2slice2.GetItem(1) |> Convert.ToInt32)
+        Assert.CheckEqual(53, t2slice2.GetItem(2) |> Convert.ToInt32)
+
+        let t2slice3 = t2.primalRaw.GetSlice(array2D [ [ 3; 3; 1 ]; [ 3; 5; 0 ] ])
+        Assert.CheckEqual(33, t2slice3.GetItem(0) |> Convert.ToInt32)
+        Assert.CheckEqual(34, t2slice3.GetItem(1) |> Convert.ToInt32)
+        Assert.CheckEqual(35, t2slice3.GetItem(2) |> Convert.ToInt32)
+
+
+    [<Test>]
+    // Test cases of indexing where indexing returns a scalar
+    member _.TestTensorIndexItemAsScalarTensor () =
+      for combo in Combos.IntegralAndFloatingPoint do 
+        let t0 = combo.tensor(2.)
+        Assert.CheckEqual(2.0, t0.toDouble())
+
+        let t1 = combo.tensor([2., 3., 4., 5., 6.])
+        let t1_0 = t1[0]
+        let t1_1 = t1[1]
+        let t1_0_s = t1_0.toDouble()
+        let t1_1_s = t1_1.toDouble()
+        Assert.CheckEqual(2.0, t1_0_s)
+        Assert.CheckEqual(3.0, t1_1_s)
+        Assert.CheckEqual(4.0, (t1[2].toDouble()))
+        Assert.CheckEqual(5.0, (t1[3].toDouble()))
+
+        let t2 = combo.tensor([[2.]; [3.]])
+        Assert.CheckEqual(2.0, (t2[0,0].toDouble()))
+        Assert.CheckEqual(3.0, (t2[1,0].toDouble()))
+
+        let t2b = combo.tensor([[1.;2.]; [3.;4.]])
+        Assert.CheckEqual(1.0, (t2b[0,0].toDouble()))
+        Assert.CheckEqual(2.0, (t2b[0,1].toDouble()))
+        Assert.CheckEqual(3.0, (t2b[1,0].toDouble()))
+        Assert.CheckEqual(4.0, (t2b[1,1].toDouble()))
+
+        let t3 = combo.tensor([[[2.; 3.]]])
+        Assert.CheckEqual(2.0, (t3[0,0,0].toDouble()))
+        Assert.CheckEqual(3.0, (t3[0,0,1].toDouble()))
+
+        let t4 = combo.tensor([[[[1.]]]])
+        Assert.CheckEqual(1.0, (t4[0,0,0,0].toDouble()))
+
+    [<Test>]
+    member _.TestTensorArange () =
+        for combo in Combos.All do
+            let t = combo.arange(5.)
+            let tCorrect = combo.tensor([0.,1.,2.,3.,4.])
+            Assert.CheckEqual(tCorrect, t)
+
+            let t2 = combo.arange(5., 1.5, 0.5)
+            let t2Correct = combo.tensor([1.5,2.,2.5,3.,3.5,4.,4.5])
+            Assert.CheckEqual(t2Correct, t2)
+
+            let t3 = combo.arange(5)
+            let t3Correct = combo.tensor([0,1,2,3,4], dtype=Dtype.Int32)
+            Assert.CheckEqual(t3Correct, t3)
+
+    [<Test>]
+    member _.TestTensorLinspace () =
+        for combo in Combos.FloatingPoint do
+            let t = combo.linspace(0, 5, 5)
+            let tCorrect = combo.tensor([0.0000, 1.2500, 2.5000, 3.7500, 5.0000])
+            Assert.That(tCorrect.allclose(t, 0.1))
+
+            let t = combo.linspace(-4, 5, 8)
+            let tCorrect = combo.tensor([-4.0000, -2.7143, -1.4286, -0.1429,  1.1429,  2.4286,  3.7143,  5.0000])
+            Assert.That(tCorrect.allclose(t, 0.1))
+
+    [<Test>]
+    member _.TestTensorLogspace () =
+        for combo in Combos.FloatingPoint do
+            let t = combo.logspace(0, 5, 5)
+            let tCorrect = combo.tensor([1.0000e+00, 1.7783e+01, 3.1623e+02, 5.6234e+03, 1.0000e+05])
+            Assert.That(tCorrect.allclose(t, 0.1))
+
+            let t = combo.logspace(-10., 1., 10, Math.E)
+            let tCorrect = combo.tensor([4.5400e-05, 1.5412e-04, 5.2320e-04, 1.7761e-03, 6.0294e-03, 2.0468e-02, 6.9483e-02, 2.3588e-01, 8.0074e-01, 2.7183e+00])
+            Assert.That(tCorrect.allclose(t, 0.1))
+
+            let t = combo.logspace(1, 10, 10)
+            let tCorrect = combo.tensor([1.0000e+01, 1.0000e+02, 1.0000e+03, 1.0000e+04, 1.0000e+05, 1.0000e+06, 1.0000e+07, 1.0000e+08, 1.0000e+09, 1.0000e+10])
+            Assert.That(tCorrect.allclose(t, 0.1))
+
+    [<Test>]
+    member _.TestTensorZeroSize () =
+        for combo in Combos.AllExcept16s do
+            let t = combo.tensor([])
+            let tshape = t.shape
+            let tshapeCorrect = [|0|]
+            let tdtype = t.dtype
+            let tdtypeCorrect = combo.dtype
+            Assert.CheckEqual(tshapeCorrect, tshape)
+            Assert.CheckEqual(tdtypeCorrect, tdtype)
+
+            let t = combo.tensor([||])
+            let tshape = t.shape
+            let tshapeCorrect = [|0|]
+            let tdtype = t.dtype
+            let tdtypeCorrect = combo.dtype
+            Assert.CheckEqual(tshapeCorrect, tshape)
+            Assert.CheckEqual(tdtypeCorrect, tdtype)
+
+        for combo in Combos.IntegralAndFloatingPointExcept16s do
+            let t = combo.tensor([])
+
+            let tAdd = t + 2
+            let tAddCorrect = t
+            Assert.CheckEqual(tAddCorrect, tAdd)
+
+            let tMul = t * 2
+            let tMulCorrect = t
+            Assert.CheckEqual(tMulCorrect, tMul)
+
+            let tSum = t.sum()
+            let tSumCorrect = tSum.zeroLike()
+            Assert.CheckEqual(tSumCorrect, tSum)
+
+            let tClone = t.clone()
+            let tCloneCorrect = t
+            Assert.CheckEqual(tCloneCorrect, tClone)
+
+        for combo in Combos.IntegralAndFloatingPointExcept16s do
+            let t = combo.tensor([])
+
+            let tSub = t - 2
+            let tSubCorrect = t
+            Assert.CheckEqual(tSubCorrect, tSub)
+
+            let tDiv = t / 2
+            let tDivCorrect = t.cast(Dtype.divisionType t.dtype t.dtype)
+            Assert.CheckEqual(tDivCorrect, tDiv)
+
+            let tNeg = -t
+            let tNegCorrect = t
+            Assert.CheckEqual(tNegCorrect, tNeg)
+
+            let tAbs = dsharp.abs(t)
+            let tAbsCorrect = t
+            Assert.CheckEqual(tAbsCorrect, tAbs)
+
+            let tSign = dsharp.sign(t)
+            let tSignCorrect = t
+            Assert.CheckEqual(tSignCorrect, tSign)
+
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([])
+
+            let tPow = t ** 2
+            let tPowCorrect = t
+            Assert.CheckEqual(tPowCorrect, tPow)
+
+    [<Test>]
+    member _.TestTensorEye () =
+        for combo in Combos.All do
+            let t = combo.eye(3)
+            let tCorrect = combo.tensor([[1., 0., 0.],
+                                          [0., 1., 0.],
+                                          [0., 0., 1.]])
+            Assert.That(tCorrect.allclose(t))
+
+            let t = combo.eye(3, 2)
+            let tCorrect = combo.tensor([[1., 0.],
+                                          [0., 1.],
+                                          [0., 0.]])
+            Assert.That(tCorrect.allclose(t))
+
+            let t = combo.eye(2, 3)
+            let tCorrect = combo.tensor([[1., 0., 0.],
+                                          [0., 1., 0.]])
+            Assert.That(tCorrect.allclose(t))
+
+            let t = combo.eye(2, 0)
+            let tCorrect = combo.tensor([])
+            Assert.That(tCorrect.allclose(t))
+        
+    [<Test>]
+    member _.TestTensorMultinomial () =
+        for combo in Combos.FloatingPoint do
+            let p1 = combo.tensor([0.2,0.3,0.5])
+            let m1 = dsharp.multinomial(p1, numSamples=3000)
+            let m1dtype = m1.dtype
+            let m1dtypeCorrect = Dtype.Int32
+            let m1mean = m1.float().mean()
+            let m1stddev = m1.float().std()
+            let m1meanCorrect = combo.tensor(1.3001).float()
+            let m1stddevCorrect = combo.tensor(0.7810).float()
+            Assert.CheckEqual(m1dtypeCorrect, m1dtype)
+            Assert.That(m1meanCorrect.allclose(m1mean, 0.1))
+            Assert.That(m1stddevCorrect.allclose(m1stddev, 0.1))
+
+            let p2 = combo.tensor([[0.2,0.3,0.5],[0.8,0.1,0.1]])
+            let m2 = dsharp.multinomial(p2, numSamples=3000)
+            let m2dtype = m2.dtype
+            let m2dtypeCorrect = Dtype.Int32
+            let m2mean = m2.float().mean(dim=1)
+            let m2stddev = m2.float().std(dim=1)
+            let m2meanCorrect = combo.tensor([1.3001, 0.3001]).float()
+            let m2stddevCorrect = combo.tensor([0.7810, 0.6404]).float()
+            Assert.CheckEqual(m2dtypeCorrect, m2dtype)
+            Assert.That(m2meanCorrect.allclose(m2mean, 0.15))
+            Assert.That(m2stddevCorrect.allclose(m2stddev, 0.15))
+
+    [<Test>]
+    member _.TestTensorBernoulli () =
+        for combo in Combos.FloatingPointExcept16s do
+            let p1 = combo.tensor([0.1,0.5,0.9])
+            let b1 = dsharp.bernoulli(p1.expand([2500;3]))
+            let b1mean = b1.mean(dim=0)
+            let b1meanCorrect = p1
+            Assert.That(b1meanCorrect.allclose(b1mean, 0.1, 0.1))
+
+            let p2 = combo.tensor([[0.2,0.4],[0.9, 0.5]])
+            let b2 = dsharp.bernoulli(p2.expand([2500;2;2]))
+            let b2mean = b2.mean(dim=0)
+            let b2meanCorrect = p2
+            Assert.That(b2meanCorrect.allclose(b2mean, 0.1, 0.1))
+
+    [<Test>]
+    member _.TestTensorDropout () =
+        for combo in Combos.FloatingPoint do
+            for p in [0.; 0.2; 0.8; 1.] do
+                let t = combo.ones([100;100])
+                let d = dsharp.dropout(t, p)
+                let m = d.mean() |> float
+                let mCorrect = 1. - p
+                Assert.That(abs(mCorrect - m) < 0.1)
+
+    [<Test>]
+    member _.TestTensorDropout2d () =
+        for combo in Combos.FloatingPointExcept16s do
+            for p in [0.; 0.2; 0.8; 1.] do
+                let t = combo.ones([100;100;8;8])
+                let d = dsharp.dropout2d(t, p)
+                let m = d.mean() |> float
+                let mCorrect = 1. - p
+                Assert.That(abs(mCorrect - m) < 0.1)
+
+    [<Test>]
+    member _.TestTensorDropout3d () =
+        for combo in Combos.FloatingPointExcept16s do
+            for p in [0.; 0.2; 0.8; 1.] do
+                let t = combo.ones([100;100;8;8;8])
+                let d = dsharp.dropout3d(t, p)
+                let m = d.mean() |> float
+                let mCorrect = 1. - p
+                Assert.That(abs(mCorrect - m) < 0.1)
+
+    [<Test>]
+    member _.TestTensorToString () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let tempty = combo.tensor([])
+            let t0 = combo.tensor(2.)
+            let t1 = combo.tensor([[2.]; [2.]])
+            let t2 = combo.tensor([[[2.; 2.]]])
+            let t3 = combo.tensor([[1.;2.]; [3.;4.]])
+            let t4 = combo.tensor([[[[1.]]]])
+            let temptyString = tempty.ToString()
+            let t0String = t0.ToString()
+            let t1String = t1.ToString()
+            let t2String = t2.ToString()
+            let t3String = t3.ToString()
+            let t4String = t4.ToString()
+            let suffix = 
+                match combo.dtype with 
+                | Bool -> failwith "unexpected bool dtype in test"
+                | Byte -> ""
+                | Int8 -> ""
+                | Int16 -> ""
+                | Int32 -> ""
+                | Int64 -> ""
+                | Float16
+                | BFloat16
+                | Float32 -> "."
+                | Float64 -> "."
+            let dtypeText = 
+                if combo.dtype = Dtype.Default then
+                    ""
+                else
+                    sprintf ",dtype=%s" (combo.dtype.ToString())
+            let deviceText = 
+                if combo.device = Device.Default then
+                    ""
+                else
+                    sprintf ",device=%s" (combo.device.ToString())
+            let backendText = 
+                if combo.backend = Backend.Default then
+                    ""
+                else
+                    sprintf ",backend=%s" (combo.backend.ToString())
+
+            let extraText = dtypeText + deviceText + backendText
+            let temptyStringCorrect = "tensor([])"
+            let t0StringCorrect = sprintf "tensor(2%s%s)" suffix extraText
+            let t1StringCorrect = sprintf "tensor([[2%s],\n        [2%s]]%s)" suffix suffix extraText
+            let t2StringCorrect = sprintf "tensor([[[2%s, 2%s]]]%s)" suffix suffix extraText
+            let t3StringCorrect = sprintf "tensor([[1%s, 2%s],\n        [3%s, 4%s]]%s)" suffix suffix suffix suffix extraText
+            let t4StringCorrect = sprintf "tensor([[[[1%s]]]]%s)" suffix extraText
+            Assert.CheckEqual(temptyStringCorrect, temptyString)
+            Assert.CheckEqual(t0StringCorrect, t0String)
+            Assert.CheckEqual(t1StringCorrect, t1String)
+            Assert.CheckEqual(t2StringCorrect, t2String)
+            Assert.CheckEqual(t3StringCorrect, t3String)
+            Assert.CheckEqual(t4StringCorrect, t4String)
+
+        let t0Bool = dsharp.tensor([ 0.; 1. ], dtype=Dtype.Bool)
+        let t0BoolToString = t0Bool.ToString()
+        let t0BoolToStringCorrect = sprintf "tensor([false,  true],dtype=Bool)" 
+        Assert.CheckEqual(t0BoolToStringCorrect, t0BoolToString)
+
+        let t1Bool = dsharp.tensor([ false; true ], dtype=Dtype.Bool)
+        let t1BoolToString = t1Bool.ToString()
+        let t1BoolToStringCorrect = sprintf "tensor([false,  true],dtype=Bool)" 
+        Assert.CheckEqual(t1BoolToStringCorrect, t1BoolToString)
+
+    [<Test>]
+    member _.TestTensorEqual () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1A = combo.tensor(-1.)
+            let t1B = combo.tensor(1.)
+            let t1C = combo.tensor(1.)
+            let t1At1BEqual = t1A = t1B
+            let t1At1BEqualCorrect = false
+            let t1Bt1CEqual = t1B = t1C
+            let t1Bt1CEqualCorrect = true
+
+            Assert.CheckEqual(t1At1BEqualCorrect, t1At1BEqual)
+            Assert.CheckEqual(t1Bt1CEqualCorrect, t1Bt1CEqual)
+
+            // Systematic testing. The tensors below are listed in expected order of comparison
+            let t2S =
+                [ combo.tensor( 0. )
+                  combo.tensor( 1. )
+                  combo.tensor([ 1.] )
+                  combo.tensor([ 2.] )
+                  combo.tensor([ 1.; 1.] )
+                  combo.tensor([ 1.; 2. ] )
+                  combo.tensor([ 2.; 1. ] ) 
+                  combo.tensor([ [ 1.; 1.] ]) ]
+
+            // Check the F# generic '=' gives expected results
+            let equalsResults = [| for a in t2S -> [| for b in t2S -> a = b |] |]
+            let equalsCorrect = [| for i in 0..t2S.Length-1 -> [| for j in 0..t2S.Length-1 -> (i=j) |] |]
+
+            Assert.CheckEqual(equalsResults, equalsCorrect)
+
+    // Bool
+        for combo in Combos.Bool do 
+            let t1A = combo.tensor(false)
+            let t1B = combo.tensor(true)
+            let t1C = combo.tensor(true)
+            let t1At1BEqual = t1A = t1B
+            let t1At1BEqualCorrect = false
+            let t1Bt1CEqual = t1B = t1C
+            let t1Bt1CEqualCorrect = true
+
+            Assert.CheckEqual(t1At1BEqualCorrect, t1At1BEqual)
+            Assert.CheckEqual(t1Bt1CEqualCorrect, t1Bt1CEqual)
+
+        for combo in Combos.All do 
+            for dtype2 in Dtypes.All do 
+                 if combo.dtype <> dtype2 then 
+                     isInvalidOp (fun () -> combo.tensor(1) = combo.tensor(1, dtype=dtype2))
+
+    [<Test>]
+    member _.TestTensorHash () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+
+            // Systematic testing. The tensors below are listed in expected order of comparison
+            let t2S =
+                [ combo.tensor( 0. )
+                  combo.tensor( 1. )
+                  combo.tensor([ 1.] )
+                  combo.tensor([ 2.] )
+                  combo.tensor([ 1.; 1.] )
+                  combo.tensor([ 1.; 2. ] )
+                  combo.tensor([ 2.; 1. ] ) 
+                  combo.tensor([ [ 1.; 1.] ]) ]
+
+            // Check the F# generic hashes are the same for identical tensors, and different for this small sample of tensors
+            let hashSameResults = [| for a in t2S -> [| for b in t2S -> hash a = hash b |] |]
+            let hashSameCorrect = [| for i in 0..t2S.Length-1 -> [| for j in 0..t2S.Length-1 -> (i=j) |] |]
+
+            Assert.CheckEqual(hashSameResults, hashSameCorrect)
+
+            // Check reallocating an identical tensor doesn't change the hash
+            let t2a = combo.tensor([ 1.] )
+            let t2b = combo.tensor([ 1.] )
+            Assert.CheckEqual(t2a.GetHashCode(), t2b.GetHashCode())
+
+    [<Test>]
+    member _.TestTensorCompare () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1A = combo.tensor(2.)
+            let t1B = combo.tensor(3.)
+            let t1At1BLess = t1A < t1B
+            let t1At1BLessCorrect = true
+
+            Assert.CheckEqual(t1At1BLessCorrect, t1At1BLess)
+
+    // Bool
+        for combo in Combos.Bool do 
+            let t1A = combo.tensor(false)
+            let t1B = combo.tensor(true)
+            let t1At1BLess = t1A < t1B
+            let t1At1BLessCorrect = true
+
+            Assert.CheckEqual(t1At1BLessCorrect, t1At1BLess)
+
+    [<Test>]
+    member _.TestTensorMove () =
+        for combo1 in Combos.All do
+            for combo2 in Combos.All do
+                let t1 = combo1.tensor([0, 1, 2, 3])
+                let t2 = t1.move(combo2.device, combo2.dtype, combo2.backend)
+                let t2b = t2.move(combo1.device, combo1.dtype, combo1.backend)
+                Assert.CheckEqual(combo2.dtype, t2.dtype)
+                Assert.CheckEqual(combo2.device, t2.device)
+                Assert.CheckEqual(combo2.backend, t2.backend)
+                if combo2.dtype <> Dtype.Bool then // Conversion to bool is irreversible for tensor([0, 1, 2, 3])
+                    Assert.CheckEqual(t1, t2b)
+
+    [<Test>]
+    member _.TestTensorMoveDefaultBackend () =
+        // Check that device and backend are not changed if not specified in move
+        for combo1 in Combos.All do
+            let t1 = combo1.tensor([0, 1, 2, 3])
+            let t1b = t1.move(?device=None, dtype=combo1.dtype, ?backend=None)
+            Assert.CheckEqual(combo1.backend, t1b.backend)
+            Assert.CheckEqual(combo1.device, t1b.device)
+
+    [<Test>]
+    member _.TestTensorCast () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            for dtype2 in Dtypes.IntegralAndFloatingPoint do 
+                let t1 = combo.tensor([1.; 2.; 3.; 5.])
+                let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=dtype2)
+                let t1Cast = t1.cast(dtype2)
+                let t2Cast = t2.cast(combo.dtype)
+
+                Assert.CheckEqual(t1Cast.dtype, dtype2)
+                Assert.CheckEqual(t2Cast.dtype, combo.dtype)
+                Assert.CheckEqual(t1Cast, t2)
+                Assert.CheckEqual(t1, t2Cast)
+
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1Bool = combo.tensor([true; false], dtype=Dtype.Bool)
+            let t2Bool = combo.tensor([1.; 0.])
+            let t1BoolCast = t1Bool.cast(combo.dtype)
+            let t2BoolCast = t2Bool.cast(Dtype.Bool)
+
+            Assert.CheckEqual(t1BoolCast.dtype, combo.dtype)
+            Assert.CheckEqual(t2BoolCast.dtype, Dtype.Bool)
+            Assert.CheckEqual(t1BoolCast, t2Bool)
+            Assert.CheckEqual(t1Bool, t2BoolCast)
+
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Int8)
+            let t1Cast = t1.int8()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Int8)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Int16)
+            let t1Cast = t1.int16()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Int16)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Int32)
+            let t1Cast = t1.int32()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Int32)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Int32)
+            let t1Cast = t1.int()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Int32)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Int64)
+            let t1Cast = t1.int64()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Int64)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Float32)
+            let t1Cast = t1.float32()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Float32)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Float64)
+            let t1Cast = t1.float64()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Float64)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Float64)
+            let t1Cast = t1.float()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Float64)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 3.; 5.], dtype=Dtype.Float64)
+            let t1Cast = t1.double()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Float64)
+            Assert.CheckEqual(t1Cast, t2)
+
+            let t1 = combo.tensor([1.; 0.])
+            let t2 = combo.tensor([1.; 0.], dtype=Dtype.Bool)
+            let t1Cast = t1.bool()
+
+            Assert.CheckEqual(t1Cast.dtype, Dtype.Bool)
+            Assert.CheckEqual(t1Cast, t2)
+
+    [<Test>]
+    member _.TestTensorBool () =
+        for tys in Combos.Bool do
+            let t1 = tys.tensor([1; 0; 1; 0], dtype=Bool)
+
+            Assert.CheckEqual([| true; false; true; false |], t1.toArray() :?> bool[])
+            Assert.CheckEqual(Bool, t1.dtype)
+
+            let t2 = tys.tensor([true; false; true; false], dtype=Bool)
+
+            Assert.CheckEqual([| true; false; true; false |], t2.toArray() :?> bool[])
+            Assert.CheckEqual(Bool, t2.dtype)
+
+    [<Test>]
+    member _.TestTensorLtTT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 3.; 5.; 4.])
+            let t1t2Lt = t1.lt(t2)
+            let t1t2LtCorrect = combo.tensor([0.; 1.; 1.; 0.], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1t2LtCorrect, t1t2Lt)
+            Assert.CheckEqual(Dtype.Bool, t1t2Lt.dtype)
+
+        for combo in Combos.Bool do 
+            // Test bool type separately
+            let t1Bool = combo.tensor([true; true; false; false ])
+            let t2Bool = combo.tensor([true; false; true; false ])
+            let t1Boolt2BoolLt = t1Bool.lt(t2Bool)
+            let t1Boolt2BoolLtCorrect = combo.tensor([false; false; true; false ], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1Boolt2BoolLtCorrect, t1Boolt2BoolLt)
+
+    [<Test>]
+    member _.TestTensorLeTT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 3.; 5.; 4.])
+            let t1t2Le = t1.le(t2)
+            let t1t2LeCorrect = combo.tensor([1.; 1.; 1.; 0.], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1t2LeCorrect, t1t2Le)
+            Assert.CheckEqual(Dtype.Bool, t1t2Le.dtype)
+
+        // Test bool type separately
+        for combo in Combos.Bool do 
+            let t1Bool = combo.tensor([true; true; false; false ])
+            let t2Bool = combo.tensor([true; false; true; false ])
+            let t1Boolt2BoolLe = t1Bool.le(t2Bool)
+            let t1Boolt2BoolLeCorrect = combo.tensor([true; false; true; true ], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1Boolt2BoolLeCorrect, t1Boolt2BoolLe)
+
+    [<Test>]
+    member _.TestTensorGtTT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 3.; 5.; 4.])
+            let t1t2Gt = t1.gt(t2)
+            let t1t2GtCorrect = combo.tensor([0.; 0.; 0.; 1.], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1t2GtCorrect, t1t2Gt)
+            Assert.CheckEqual(Dtype.Bool, t1t2Gt.dtype)
+
+        // Test bool type separately
+        for combo in Combos.Bool do 
+            let t1Bool = combo.tensor([true; true; false; false ])
+            let t2Bool = combo.tensor([true; false; true; false ])
+            let t1Boolt2BoolGt = t1Bool.gt(t2Bool)
+            let t1Boolt2BoolGtCorrect = combo.tensor([false; true; false; false ], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1Boolt2BoolGtCorrect, t1Boolt2BoolGt)
+
+    [<Test>]
+    member _.TestTensorGeTT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 3.; 5.; 4.])
+            let t1t2Ge = t1.ge(t2)
+            let t1t2GeCorrect = combo.tensor([1.; 0.; 0.; 1.], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1t2GeCorrect, t1t2Ge)
+            Assert.CheckEqual(Dtype.Bool, t1t2Ge.dtype)
+
+        // Test bool type separately
+        for combo in Combos.Bool do 
+            // Test bool type separately
+            let t1Bool = combo.tensor([true; true; false; false ])
+            let t2Bool = combo.tensor([true; false; true; false ])
+            let t1Boolt2BoolGe = t1Bool.ge(t2Bool)
+            let t1Boolt2BoolGeCorrect = combo.tensor([true; true; false; true ], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1Boolt2BoolGeCorrect, t1Boolt2BoolGe)
+
+    [<Test>]
+    member _.TestTensorEqTT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 5.; 4.])
+            let t1t2Eq = t1.eq(t2)
+            let t1t2EqCorrect = combo.tensor([1.; 1.; 0.; 0.], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1t2EqCorrect, t1t2Eq)
+            Assert.CheckEqual(Dtype.Bool, t1t2Eq.dtype)
+
+        // Test bool type separately
+        for combo in Combos.Bool do 
+            // Test bool type separately
+            let t1Bool = combo.tensor([true; true; false; false ])
+            let t2Bool = combo.tensor([true; false; true; false ])
+            let t1Boolt2BoolEq = t1Bool.eq(t2Bool)
+            let t1Boolt2BoolEqCorrect = combo.tensor([true; false; false; true ], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1Boolt2BoolEqCorrect, t1Boolt2BoolEq)
+
+    [<Test>]
+    member _.TestTensorNeq () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.; 5.])
+            let t2 = combo.tensor([1.; 2.; 5.; 4.])
+            let t1t2Neq = t1.ne(t2)
+            let t1t2NeqCorrect = combo.tensor([0.; 0.; 1.; 1.], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1t2NeqCorrect, t1t2Neq)
+            Assert.CheckEqual(Dtype.Bool, t1t2Neq.dtype)
+
+        // Test bool type separately
+        for combo in Combos.Bool do 
+            // Test bool type separately
+            let t1Bool = combo.tensor([true; true; false; false ])
+            let t2Bool = combo.tensor([true; false; true; false ])
+            let t1Boolt2BoolNeq = t1Bool.ne(t2Bool)
+            let t1Boolt2BoolNeqCorrect = combo.tensor([false; true; true; false ], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1Boolt2BoolNeqCorrect, t1Boolt2BoolNeq)
+
+    [<Test>]
+    member _.TestTensorIsinf () =
+        // isinf always returns bool tensor
+        for combo in Combos.FloatingPoint do 
+            let t = combo.tensor([1.; infinity; 3.; -infinity])
+            let i = dsharp.isinf(t)
+            let iCorrect = combo.tensor([0.; 1.; 0.; 1.], dtype=Dtype.Bool)
+            Assert.CheckEqual(iCorrect, i)
+
+        // Integer tensors always return 0 for isinf
+        for combo in Combos.IntegralAndBool do 
+            let t = combo.tensor([1.; 0.; 1.])
+            let i = dsharp.isinf(t)
+            let iCorrect = combo.tensor([0.; 0.; 0.], dtype=Dtype.Bool)
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorIsnan () =
+        // isnan always returns bool tensor
+        for combo in Combos.FloatingPoint do 
+            let t = combo.tensor([1.; nan; 3.; nan])
+            let i = dsharp.isnan(t)
+            let iCorrect = combo.tensor([false; true; false; true], dtype=Dtype.Bool)
+            Assert.CheckEqual(iCorrect, i)
+
+        // Integer and bool tensors always return false for isnan
+        for combo in Combos.IntegralAndBool do 
+            let t = combo.tensor([1.; 0.; 1.])
+            let i = dsharp.isnan(t)
+            let iCorrect = combo.tensor([0.; 0.; 0.], dtype=Dtype.Bool)
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorOnesLike () =
+        for combo in Combos.All do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.onesLike([2])
+            let iCorrect = combo.tensor([1.; 1.])
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorZerosLike () =
+        for combo in Combos.All do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.zerosLike([2])
+            let iCorrect = combo.tensor([0.; 0.])
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorFullLike () =
+        for combo in Combos.All do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.fullLike(4.0, [2])
+            let iCorrect = combo.tensor([4.; 4.])
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorZeroLike () =
+        for combo in Combos.All do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.zeroLike()
+            let iCorrect = combo.tensor(0.)
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorOneLike () =
+        for combo in Combos.All do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.oneLike()
+            let iCorrect = combo.tensor(1.)
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorRandLike() =
+        for combo in Combos.FloatingPoint do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.randLike([2])
+            Assert.CheckEqual(i.shape, [|2|])
+            Assert.CheckEqual(i.dtype, t.dtype)
+            Assert.CheckEqual(i.dtype, combo.dtype)
+
+        for combo in Combos.Bool do
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            isInvalidOp(fun () -> t.randLike([2]))
+
+    [<Test>]
+    member _.TestTensorRandnLike() =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            let i = t.randnLike([2])
+            Assert.CheckEqual(i.shape, [|2|])
+            Assert.CheckEqual(i.dtype, t.dtype)
+            Assert.CheckEqual(i.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            let t = combo.tensor([1.; 2.; 3.; 4.])
+            isInvalidOp(fun () -> t.randnLike([2]))
+
+    [<Test>]
+    member _.TestTensorHasinf () =
+        for combo in Combos.FloatingPoint do 
+            let t1 = combo.tensor([1.; infinity; 3.; -infinity])
+            let t1i = dsharp.hasinf(t1)
+            let t1iCorrect = true
+            let t2 = combo.tensor([1.; 2.; 3.; 4.])
+            let t2i = dsharp.hasinf(t2)
+            let t2iCorrect = false
+            Assert.CheckEqual(t1iCorrect, t1i)
+            Assert.CheckEqual(t2iCorrect, t2i)
+
+        for combo in Combos.IntegralAndBool do 
+            let t = combo.tensor([1.; 0.; 1.])
+            let i = dsharp.hasinf(t)
+            let iCorrect = false
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorHasnan () =
+        for combo in Combos.FloatingPoint do 
+            let t1 = combo.tensor([1.; nan; 3.; nan])
+            let t1i = dsharp.hasnan(t1)
+            let t1iCorrect = true
+            let t2 = combo.tensor([1.; 2.; 3.; 4.])
+            let t2i = dsharp.hasnan(t2)
+            let t2iCorrect = false
+            Assert.CheckEqual(t1iCorrect, t1i)
+            Assert.CheckEqual(t2iCorrect, t2i)
+
+        for combo in Combos.IntegralAndBool do 
+            let t = combo.tensor([1.; 0.; 1.])
+            let i = dsharp.hasnan(t)
+            let iCorrect = false
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorAddTT () =
+        // Test all pairs of non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            for dtype2 in Dtypes.IntegralAndFloatingPoint do 
+                match Dtype.widen combo.dtype dtype2 with 
+                | None -> ()
+                | Some dtypeRes -> 
+                let t1 = combo.tensor([1.; 2.]) + combo.tensor([3.; 4.], dtype=dtype2)
+                let t1Correct = combo.tensor([4.; 6.], dtype=dtypeRes)
+
+                let t2 = combo.tensor([1.; 2.]) + combo.tensor(5., dtype=dtype2)
+                let t2Correct = combo.tensor([6.; 7.], dtype=dtypeRes)
+
+                Assert.CheckEqual(t1Correct, t1)
+                Assert.CheckEqual(t2Correct, t2)
+                Assert.CheckEqual(t1.dtype, dtypeRes)
+                Assert.CheckEqual(t2.dtype, dtypeRes)
+
+    [<Test>]
+    member _.TestTensorAddTTScalarBroadcasting () =
+        // Test scalar broadcasting 
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t3 = combo.tensor([1; 2]) + 5
+            let t3Correct = combo.tensor([6; 7])
+
+            let t4 = combo.tensor([1; 2]) + 5
+            let t4Correct = combo.tensor([6; 7])
+
+            let t5 = combo.tensor([1; 2]) + 5
+            let t5Correct = combo.tensor([6; 7])
+
+            Assert.CheckEqual(t3Correct, t3)
+            Assert.CheckEqual(t4Correct, t4)
+            Assert.CheckEqual(t5Correct, t5)
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+            Assert.CheckEqual(t4.dtype, combo.dtype)
+            Assert.CheckEqual(t5.dtype, combo.dtype)
+
+        // Bool tensors support addition returning bool
+        //
+        //   t = torch.tensor([[True]], dtype=torch.bool)
+        //   t + t
+        //
+        //   tensor([[True]])
+
+        for combo in Combos.Bool do 
+            let t5a = combo.tensor([true; false])
+            let t5b = combo.tensor([true; true])
+            let t5 = t5a + t5b
+            let t5Correct = combo.tensor([true; true])
+            Assert.CheckEqual(t5, t5Correct)
+
+    [<Test>]
+    member _.TestTensorAddTT_BroadcastingSystematic () =
+      for combo in Combos.IntegralAndFloatingPointExcept16s do 
+
+        // Check all broadcasts into 2x2
+        // 2x2 * 1  (broadcast --> 2x2)
+        // 2x2 * 2  (broadcast --> 2x2)
+        // 2x2 * 2x1  (broadcast --> 2x2)
+        // 2x2 * 1x2  (broadcast --> 2x2)
+        let t6a = combo.tensor([ [1.; 2.]; [3.; 4.] ])
+        for t6b in [ combo.tensor([ 5.0 ])
+                     combo.tensor([ 5.0; 5.0 ])
+                     combo.tensor([ [5.0]; [5.0] ])
+                     combo.tensor([ [5.0; 5.0] ]) ] do
+            let t6 = t6a + t6b
+            let t6Commute = t6b + t6a
+            let t6Correct = combo.tensor([ [6.; 7.]; [8.; 9.] ])
+
+            Assert.CheckEqual(t6Correct, t6)
+            Assert.CheckEqual(t6Correct, t6Commute)
+
+        // Systematically do all allowed broadcasts into 2x3x4
+        // 2x3x4 + 1  (broadcast --> 2x3x4)
+        // 2x3x4 + 4  (broadcast --> 2x3x4)
+        // 2x3x4 + 1x1  (broadcast --> 2x3x4)
+        // 2x3x4 + 3x1  (broadcast --> 2x3x4)
+        // 2x3x4 + 1x4  (broadcast --> 2x3x4)
+        // etc.
+        let t7a = combo.tensor([ [ [1.; 2.; 3.; 4.]; [5.; 6.; 7.; 8.]; [9.; 10.; 11.; 12.] ];
+                                 [ [13.; 14.; 15.; 16.]; [17.; 18.; 19.; 20.]; [21.; 22.; 23.; 24.] ]  ])
+        let t7Shapes = 
+            [ for i1 in [0;1;2] do
+                for i2 in [0;1;3] do
+                  for i3 in [0;1;4] do 
+                    if i1 <> 2 || i2 <> 3 || i3 <> 4 then
+                        [| if i1 <> 0 && i2 <> 0 && i3 <> 0 then yield i1
+                           if i2 <> 0 && i3 <> 0 then yield i2
+                           if i3 <> 0 then yield i3 |] ]
+            |> List.distinct
+
+        let t7Results, t7CommuteResults = 
+            [| for shape in t7Shapes do 
+                  let t7b = combo.tensor(ArrayND.init shape (fun is -> double (Array.sum is) + 2.0))
+                  let t7 = t7a + t7b
+                  let t7Commute = t7b + t7a
+                  yield (t7b, t7), (t7b, t7Commute) |]
+            |> Array.unzip
+
+        let t7Expected =
+            [|(combo.tensor 2.,                                                       combo.tensor [[[3., 4., 5., 6.], [7., 8., 9., 10.], [11., 12., 13., 14.]], [[15., 16., 17., 18.], [19., 20., 21., 22.], [23., 24., 25., 26.]]]);
+              (combo.tensor [2.],                                                     combo.tensor [[[3., 4., 5., 6.], [7., 8., 9., 10.], [11., 12., 13., 14.]], [[15., 16., 17., 18.], [19., 20., 21., 22.], [23., 24., 25., 26.]]]);
+              (combo.tensor [2., 3., 4., 5.],                                         combo.tensor [[[3., 5., 7., 9.], [7., 9., 11., 13.], [11., 13., 15., 17.]], [[15., 17., 19., 21.], [19., 21., 23., 25.], [23., 25., 27., 29.]]]);
+              (combo.tensor [[2.]],                                                   combo.tensor [[[3., 4., 5., 6.], [7., 8., 9., 10.], [11., 12., 13., 14.]], [[15., 16., 17., 18.], [19., 20., 21., 22.], [23., 24., 25., 26.]]]);
+              (combo.tensor [[2., 3., 4., 5.]],                                       combo.tensor [[[3., 5., 7., 9.], [7., 9., 11., 13.], [11., 13., 15., 17.]], [[15., 17., 19., 21.], [19., 21., 23., 25.], [23., 25., 27., 29.]]]);
+              (combo.tensor [[2.], [3.], [4.]],                                       combo.tensor [[[3., 4., 5., 6.], [8., 9., 10., 11.], [13., 14., 15., 16.]], [[15., 16., 17., 18.], [20., 21., 22., 23.], [25., 26., 27., 28.]]]);
+              (combo.tensor [[2., 3., 4., 5.], [3., 4., 5., 6.], [4., 5., 6., 7.]],   combo.tensor [[[3., 5., 7., 9.], [8., 10., 12., 14.], [13., 15., 17., 19.]], [[15., 17., 19., 21.], [20., 22., 24., 26.], [25., 27., 29., 31.]]]);
+              (combo.tensor [[[2.]]],                                                 combo.tensor [[[3., 4., 5., 6.], [7., 8., 9., 10.], [11., 12., 13., 14.]], [[15., 16., 17., 18.], [19., 20., 21., 22.], [23., 24., 25., 26.]]]);
+              (combo.tensor [[[2., 3., 4., 5.]]],                                     combo.tensor [[[3., 5., 7., 9.], [7., 9., 11., 13.], [11., 13., 15., 17.]], [[15., 17., 19., 21.], [19., 21., 23., 25.], [23., 25., 27., 29.]]]);
+              (combo.tensor [[[2.], [3.], [4.]]],                                     combo.tensor [[[3., 4., 5., 6.], [8., 9., 10., 11.], [13., 14., 15., 16.]], [[15., 16., 17., 18.], [20., 21., 22., 23.], [25., 26., 27., 28.]]]);
+              (combo.tensor [[[2., 3., 4., 5.], [3., 4., 5., 6.], [4., 5., 6., 7.]]], combo.tensor [[[3., 5., 7., 9.], [8., 10., 12., 14.], [13., 15., 17., 19.]], [[15., 17., 19., 21.], [20., 22., 24., 26.], [25., 27., 29., 31.]]]);
+              (combo.tensor [[[2.]], [[3.]]],                                         combo.tensor [[[3., 4., 5., 6.], [7., 8., 9., 10.], [11., 12., 13., 14.]], [[16., 17., 18., 19.], [20., 21., 22., 23.], [24., 25., 26., 27.]]]);
+              (combo.tensor [[[2., 3., 4., 5.]], [[3., 4., 5., 6.]]],                 combo.tensor [[[3., 5., 7., 9.], [7., 9., 11., 13.], [11., 13., 15., 17.]], [[16., 18., 20., 22.], [20., 22., 24., 26.], [24., 26., 28., 30.]]]);
+              (combo.tensor [[[2.], [3.], [4.]], [[3.], [4.], [5.]]],                 combo.tensor [[[3., 4., 5., 6.], [8., 9., 10., 11.], [13., 14., 15., 16.]], [[16., 17., 18., 19.], [21., 22., 23., 24.], [26., 27., 28., 29.]]])|]
+
+
+        Assert.CheckEqual(t7Expected, t7Results)
+        Assert.CheckEqual(t7Expected, t7CommuteResults)
+
+
+
+    [<Test>]
+    member _.TestTensorStackTs () =
+      for combo in Combos.All do 
+        let t0a = combo.tensor(1.)
+        let t0b = combo.tensor(3.)
+        let t0c = combo.tensor(5.)
+        let t0 = Tensor.stack([t0a;t0b;t0c])
+        let t0Correct = combo.tensor([1.;3.;5.])
+
+        let t1a = combo.tensor([1.; 2.])
+        let t1b = combo.tensor([3.; 4.])
+        let t1c = combo.tensor([5.; 6.])
+        let t1 = Tensor.stack([t1a;t1b;t1c])
+
+        let t2a = combo.tensor([ [1.; 2.] ])
+        let t2b = combo.tensor([ [3.; 4.] ])
+        let t2c = combo.tensor([ [5.; 6.] ])
+        let t2_dim0 = Tensor.stack([t2a;t2b;t2c], dim=0)
+        let t2_dim1 = Tensor.stack([t2a;t2b;t2c], dim=1)
+        let t2_dim2 = Tensor.stack([t2a;t2b;t2c], dim=2)
+        let t2Correct_dim0 = combo.tensor([[[1.;2.]];[[3.;4.]];[[5.;6.]]])
+        let t2Correct_dim1 = combo.tensor([[[1.;2.];[3.;4.];[5.;6.]]])
+        let t2Correct_dim2 = combo.tensor([[[1.;3.;5.];[2.;4.;6.]]])
+
+        let t1Correct = combo.tensor([[1.;2.];[3.;4.];[5.;6.]])
+
+        Assert.CheckEqual(t0Correct, t0)
+        Assert.CheckEqual(t1Correct, t1)
+        Assert.CheckEqual(t0.dtype, combo.dtype)
+        Assert.CheckEqual(t1.dtype, combo.dtype)
+
+        Assert.CheckEqual(t2Correct_dim0, t2_dim0)
+        Assert.CheckEqual(t2Correct_dim1, t2_dim1)
+        Assert.CheckEqual(t2Correct_dim2, t2_dim2)
+
+    [<Test>]
+    member _.TestTensorUnstackT () =
+        for combo in Combos.All do 
+            let t0a = combo.tensor(1.)
+            let t0b = combo.tensor(3.)
+            let t0c = combo.tensor(5.)
+            let t0Correct = [t0a;t0b;t0c]
+            let t0 = Tensor.stack(t0Correct).unstack()
+
+            let t1a = combo.tensor([1.; 2.])
+            let t1b = combo.tensor([3.; 4.])
+            let t1c = combo.tensor([5.; 6.])
+            let t1Correct = [t1a;t1b;t1c]
+            let t1 = Tensor.stack(t1Correct).unstack()
+
+            // 3x1x2
+            let t2a = combo.tensor([[[1.;2.]];[[3.;4.]];[[5.;6.]]])
+            let t2 = t2a.unstack()
+            let t2_dim1 = t2a.unstack(dim=1)
+            let t2_dim2 = t2a.unstack(dim=2)
+            // 3 of 1x2
+            let t2Correct = [combo.tensor [[1.;2.]]; combo.tensor [[3.;4.]]; combo.tensor [[5.;6.]]]
+            // 1 of 3x2
+            let t2Correct_dim1 = [combo.tensor [[1.;2.];[3.;4.];[5.;6.]]]
+            // 2 of 3x1
+            let t2Correct_dim2 = [combo.tensor [[1.];[3.];[5.]]; combo.tensor [[2.];[4.];[6.]]]
+
+            Assert.CheckEqual(t0Correct, Seq.toList t0)
+            Assert.CheckEqual(t1Correct, Seq.toList t1)
+            for t in t1 do 
+                Assert.CheckEqual(t.dtype, combo.dtype)
+            Assert.CheckEqual(t2Correct, Array.toList t2)
+            Assert.CheckEqual(t2Correct_dim1, Array.toList t2_dim1)
+            Assert.CheckEqual(t2Correct_dim2, Array.toList t2_dim2)
+
+    [<Test>]
+    member _.TestTensorCatTs () =
+        for combo in Combos.All do 
+
+            let t0a = combo.tensor([1.; 2.])
+            let t0 = Tensor.cat([t0a])
+            let t0Correct = combo.tensor([1.;2.])
+
+            Assert.CheckEqual(t0Correct, t0)
+
+            let t1a = combo.tensor([1.; 2.]) // 2
+            let t1b = combo.tensor([3.; 4.]) // 2
+            let t1c = combo.tensor([5.; 6.]) // 2
+            let t1 = Tensor.cat([t1a;t1b;t1c]) // 6
+            let t1_dim0 = Tensor.cat([t1a;t1b;t1c],dim=0) // 6
+            let t1Correct = combo.tensor([1.;2.;3.;4.;5.;6.])
+
+            Assert.CheckEqual(t1Correct, t1)
+            Assert.CheckEqual(t1Correct, t1_dim0)
+
+            let t2a = combo.tensor([ [1.; 2.] ]) // 1x2
+            let t2b = combo.tensor([ [3.; 4.] ]) // 1x2
+            let t2c = combo.tensor([ [5.; 6.] ]) // 1x2
+            let t2 = Tensor.cat([t2a;t2b;t2c]) // 3x2
+            let t2_dim0 = Tensor.cat([t2a;t2b;t2c], dim=0) // 3x2
+            let t2_dim1 = Tensor.cat([t2a;t2b;t2c], dim=1) // 1x6
+            let t2Correct_dim0 = combo.tensor([[1.;2.];[3.;4.];[5.;6.]]) // 3x2
+            let t2Correct_dim1 = combo.tensor([[1.;2.;3.;4.;5.;6.]]) // 1x6
+
+            Assert.CheckEqual(t2Correct_dim0, t2)
+            Assert.CheckEqual(t2Correct_dim0, t2_dim0)
+            Assert.CheckEqual(t2Correct_dim1, t2_dim1)
+
+            // irregular sizes dim0
+            let t3a = combo.tensor([ [1.; 2.] ]) // 1x2
+            let t3b = combo.tensor([ [3.; 4.];[5.; 6.] ]) // 2x2
+            let t3c = combo.tensor([ [7.; 8.] ]) // 1x2
+            let t3 = Tensor.cat([t3a;t3b;t3c]) // 4x2
+            let t3Correct = combo.tensor([[1.;2.];[3.;4.];[5.;6.];[7.;8.]]) // 4x2
+
+            Assert.CheckEqual(t3Correct, t3)
+
+            // irregular sizes dim1
+            let t4a = combo.tensor([ [1.]; [2.] ]) // 2x1
+            let t4b = combo.tensor([ [3.; 4.];[5.; 6.] ]) // 2x2
+            let t4c = combo.tensor([ [7.]; [8.] ]) // 2x1
+            let t4_dim1 = Tensor.cat([t4a;t4b;t4c],dim=1) // 2x4
+            let t4Correct_dim1 = combo.tensor([[1.;3.;4.;7.];[2.;5.;6.;8.]]) // 2x4
+
+            Assert.CheckEqual(t4Correct_dim1, t4_dim1)
+
+    [<Test>]
+    member _.TestTensorSplitT_Basics () =
+        
+        for combo in Combos.All do 
+            //6 --> 2;2;2
+            let t1in = combo.tensor([1.;2.;3.;4.;5.;6.]) // 6
+            let t1 = t1in.split([2;2;2]) |> Seq.toList // 3 of 2
+            let t1Correct = [combo.tensor([1.; 2.]);combo.tensor([3.; 4.]);combo.tensor([5.; 6.])]
+
+            Assert.CheckEqual(t1Correct, t1)
+
+            // 3x1x2
+            let t2in = combo.tensor([[[1.;2.]];[[3.;4.]];[[5.;6.]]])
+            let t2 = t2in.split(sizes=[1;1;1], dim=0)  |> Seq.toList // 3 of 1x1x2
+            let t2Correct = [combo.tensor [[[1.;2.]]]; combo.tensor [[[3.;4.]]]; combo.tensor [[[5.;6.]]]]
+
+            Assert.CheckEqual(t2Correct, t2)
+
+            let t3in = combo.tensor([[[1.;2.]];[[3.;4.]];[[5.;6.]]])
+            let t3 = t3in.split(sizes=[1;2], dim=0)  |> Seq.toList // 2 of 1x1x2 and 2x1x2
+            let t3Correct = [combo.tensor [[[1.;2.]]]; combo.tensor [[[3.;4.]];[[5.;6.]]]]
+
+            Assert.CheckEqual(t3Correct, t3)
+
+            let t4in = combo.tensor([[[1.;2.]];[[3.;4.]];[[5.;6.]]])
+            let t4 = t4in.split(sizes=[1], dim=1)  |> Seq.toList // 1 of 3x1x2
+            let t4Correct = [combo.tensor [[[1.;2.]];[[3.;4.]];[[5.;6.]]]] // 1 of 3x1x2
+
+            Assert.CheckEqual(t4Correct, t4)
+
+            let t5in = combo.tensor([[[1.;2.]];[[3.;4.]];[[5.;6.]]])
+            let t5 = t5in.split(sizes=[1;1], dim=2)  |> Seq.toList // 2 of 3x1x1
+            let t5Correct = [combo.tensor [[[1.]];[[3.]];[[5.]]]; combo.tensor [[[2.]];[[4.]];[[6.]]]] // 2 of 3x1x1
+
+            Assert.CheckEqual(t5Correct, t5)
+
+            //systematic split of 6 
+            let t6vs = [1..6]
+            let t6in = combo.tensor(t6vs) // 6
+            for p1 in 0..6 do
+              for p2 in 0..6 do
+                for p3 in 0..6 do
+                   if p1+p2+p3 = 6 then 
+                      let t6 = 
+                          t6in.split([if p1 > 0 then p1 
+                                      if p2 > 0 then p2
+                                      if p3 > 0 then p3])
+                          |> Seq.toList 
+                      let t6Correct = 
+                          [if p1 > 0 then combo.tensor(t6vs[0..p1-1]);
+                           if p2 > 0 then combo.tensor(t6vs[p1..p1+p2-1]);
+                           if p3 > 0 then combo.tensor(t6vs[p1+p2..])]
+
+                      Assert.CheckEqual(t6Correct, t6)
+
+
+            //systematic split of 2x6 along dim1
+            let t7vs1 = [1..6]
+            let t7vs2 = [7..12]
+            let t7in = combo.tensor([ t7vs1; t7vs2] ) // 2x6
+            for p1 in 0..6 do
+              for p2 in 0..6 do
+                for p3 in 0..6 do
+                   if p1+p2+p3 = 6 then 
+                      let sizes =
+                          [if p1 > 0 then p1 
+                           if p2 > 0 then p2
+                           if p3 > 0 then p3]
+                      let t7 = t7in.split(sizes,dim=1) |> Seq.toList 
+                      let t7Correct = 
+                          [if p1 > 0 then combo.tensor([ t7vs1[0..p1-1];     t7vs2[0..p1-1] ]);
+                           if p2 > 0 then combo.tensor([ t7vs1[p1..p1+p2-1]; t7vs2[p1..p1+p2-1] ]);
+                           if p3 > 0 then combo.tensor([ t7vs1[p1+p2..];     t7vs2[p1+p2..] ])]
+
+                      Assert.CheckEqual(t7Correct, t7)
+
+
+
+    [<Test>]
+    member _.TestTensorAddT2T1 () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([[1.; 2.]; [3.; 4.]]) + combo.tensor([5.; 6.])
+            let t1Correct = combo.tensor([[6.; 8.]; [8.; 10.]])
+
+            Assert.CheckEqual(t1Correct, t1)
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+
+        for combo in Combos.Bool do 
+            // check broadcast for bool tensor 0 --> [2]
+            let t6a = combo.tensor([true; false])
+            let t6b = combo.tensor(true)
+            let t6 = t6a + t6b
+            let t6Correct = combo.tensor([true; true])
+            Assert.CheckEqual(t6, t6Correct)
+
+            // check broadcast for bool tensor [1] --> [2]
+            let t7a = combo.tensor([true; false])
+            let t7b = combo.tensor([true])
+            let t7 = t7a + t7b
+            let t7Correct = combo.tensor([true; true])
+            Assert.CheckEqual(t7, t7Correct)
+
+
+    [<Test>]
+    member _.TestTensorSubTT () =
+        // Test all pairs of non-bool types, for widening
+        for combo in Combos.IntegralAndFloatingPoint do 
+            for dtype2 in Dtypes.IntegralAndFloatingPoint do 
+                match Dtype.widen combo.dtype dtype2 with 
+                | None -> ()
+                | Some dtypeRes -> 
+
+                let t1 = combo.tensor([1.; 2.]) - combo.tensor([3.; 4.], dtype=dtype2)
+                let t1Correct = combo.tensor([-2.; -2.], dtype=dtypeRes)
+
+                Assert.CheckEqual(t1Correct, t1)
+                Assert.CheckEqual(t1.dtype, dtypeRes)
+
+                let t2 = combo.tensor([1.; 2.]) - combo.tensor(5., dtype=dtype2)
+                let t2Correct = combo.tensor([-4.; -3.], dtype=dtypeRes)
+
+                Assert.CheckEqual(t2Correct, t2)
+                Assert.CheckEqual(t2.dtype, dtypeRes)
+
+        // Test scalar broadcast
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t3 = combo.tensor([1; 2]) - 5
+            let t3Correct = combo.tensor([-4; -3])
+
+            Assert.CheckEqual(t3Correct, t3)
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+            let t4 = 5 - combo.tensor([1; 2])
+            let t4Correct = combo.tensor([4; 3])
+
+            Assert.CheckEqual(t4Correct, t4)
+            Assert.CheckEqual(t4.dtype, combo.dtype)
+
+            let t5 = combo.tensor([1; 2]) - 5
+            let t5Correct = combo.tensor([-4; -3])
+
+            Assert.CheckEqual(t5Correct, t5)
+            Assert.CheckEqual(t5.dtype, combo.dtype)
+
+        for combo in Combos.Bool do 
+            // Bool tensors do not support subtraction
+            //
+            //   torch.tensor([[True]], dtype=torch.bool) - torch.tensor([[True]], dtype=torch.bool)
+            //
+            // RuntimeError: Subtraction, the `-` operator, with two bool tensors is not supported. Use the `^` or `logical_xor()` operator instead.
+
+            let t5a = combo.tensor([true; false])
+            let t5b = combo.tensor([true; true])
+            isInvalidOp(fun () -> t5a - t5b)
+
+    [<Test>]
+    member _.TestTensorMulTT () =
+        // Test all pairs of non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            for dtype2 in Dtypes.IntegralAndFloatingPoint do 
+                match Dtype.widen combo.dtype dtype2 with 
+                | None -> ()
+                | Some dtypeRes -> 
+                let t1 = combo.tensor([1.; 2.]) * combo.tensor([3.; 4.], dtype=dtype2)
+                let t1Correct = combo.tensor([3.; 8.], dtype=dtypeRes)
+
+                Assert.CheckEqual(t1Correct, t1)
+                Assert.CheckEqual(t1.dtype, dtypeRes)
+
+                let t2 = combo.tensor([1.; 2.]) * combo.tensor(5., dtype=dtype2)
+                let t2Correct = combo.tensor([5.; 10.], dtype=dtypeRes)
+
+                Assert.CheckEqual(t2Correct, t2)
+                Assert.CheckEqual(t2.dtype, dtypeRes)
+
+        // Test scalar broadcasting 
+        for combo in Combos.FloatingPoint do 
+            let t3 = combo.tensor([1.; 2.]) * 5.f
+            let t3Correct = combo.tensor([5.; 10.])
+
+            Assert.CheckEqual(t3Correct, t3)
+
+            let t4 = 5. * combo.tensor([1.; 2.])
+            let t4Correct = combo.tensor([5.; 10.])
+
+            Assert.CheckEqual(t4Correct, t4)
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+            Assert.CheckEqual(t4.dtype, combo.dtype)
+
+        for combo in Combos.Integral do 
+            let t3 = combo.tensor([1; 2]) * 5
+            let t3Correct = combo.tensor([5; 10])
+
+            Assert.CheckEqual(t3Correct, t3)
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+            let t4 = 5 * combo.tensor([1; 2])
+            let t4Correct = combo.tensor([5; 10])
+
+            Assert.CheckEqual(t4Correct, t4)
+            Assert.CheckEqual(t4.dtype, combo.dtype)
+
+            // Multiplying integer tensors by a floating point number always
+            // results in float32. THis is the same behaviour as Torch
+            let t5 = 5.0 * combo.tensor([1; 2])
+            let t5Correct = combo.tensor([5; 10], dtype=Dtype.Float32)
+
+            Assert.CheckEqual(t5Correct, t5)
+            Assert.CheckEqual(t5.dtype, Dtype.Float32)
+
+        // Bool tensors support multiplication giving bool tensor
+        //
+        //    torch.ones(10, dtype=torch.bool) * torch.ones(10, dtype=torch.bool)
+        //
+        //    tensor([True, True, True, True, True, True, True, True, True, True])
+        for combo in Combos.Bool do 
+            let t1 = combo.tensor([true; true])
+            let t2 = combo.tensor([true; false])
+            let i = t1 * t2
+            let iCorrect = combo.tensor([true; false])
+            Assert.CheckEqual(iCorrect, i)
+
+    [<Test>]
+    member _.TestTensorMulTT_BroadcastSystematic () =
+      for combo in Combos.FloatingPointExcept16s do 
+        // 2x2 * 1  (broadcast --> 2x2)
+        // 2x2 * 2  (broadcast --> 2x2)
+        // 2x2 * 2x1  (broadcast --> 2x2)
+        // 2x2 * 1x2  (broadcast --> 2x2)
+        let t5a = combo.tensor([ [1.; 2.]; [3.; 4.] ])
+        for t5b in [ combo.tensor([ 5.0 ])
+                     combo.tensor([ 5.0; 5.0 ])
+                     combo.tensor([ [5.0]; [5.0] ])
+                     combo.tensor([ [5.0; 5.0] ]) ] do
+            let t5 = t5a * t5b
+            let t5Commute = t5b * t5a
+            let t5Correct = combo.tensor([ [5.; 10.]; [15.; 20.] ])
+
+            Assert.CheckEqual(t5Correct, t5)
+            Assert.CheckEqual(t5Correct, t5Commute)
+
+        // Systematically do all allowed broadcasts into 2x3x4
+        // 2x3x4 * 1  (broadcast --> 2x3x4)
+        // 2x3x4 * 4  (broadcast --> 2x3x4)
+        // 2x3x4 * 1x1  (broadcast --> 2x3x4)
+        // 2x3x4 * 3x1  (broadcast --> 2x3x4)
+        // 2x3x4 * 1x4  (broadcast --> 2x3x4)
+        // etc.
+        let t6a = combo.tensor([ [ [1.; 2.; 3.; 4.]; [5.; 6.; 7.; 8.]; [9.; 10.; 11.; 12.] ];
+                                  [ [13.; 14.; 15.; 16.]; [17.; 18.; 19.; 20.]; [21.; 22.; 23.; 24.] ]  ])
+
+        // These are all the interesting shapes that broadcast into t6a
+        let t6Shapes = 
+            [ for i1 in [0;1;2] do
+                for i2 in [0;1;3] do
+                  for i3 in [0;1;4] do 
+                    if i1 <> 2 || i2 <> 3 || i3 <> 4 then
+                        [| if i1 <> 0 && i2 <> 0 && i3 <> 0 then yield i1
+                           if i2 <> 0 && i3 <> 0 then yield i2
+                           if i3 <> 0 then yield i3 |] ]
+            |> List.distinct
+
+        let t6Results, t6CommuteResults = 
+            [| for shape in t6Shapes do 
+                  let t6b = combo.tensor(ArrayND.init shape (fun is -> double (Array.sum is) + 2.0))
+                  let t6 = t6a * t6b
+                  let t6Commute = t6b * t6a
+                  yield (t6b, t6 ), (t6b, t6Commute ) |]
+            |> Array.unzip
+
+        let t6Expected =
+            [|(combo.tensor 2.,                                                      combo.tensor [[[2., 4., 6., 8.], [10., 12., 14., 16.], [18., 20., 22., 24.]], [[26., 28., 30., 32.], [34., 36., 38., 40.], [42., 44., 46., 48.]]]);
+              (combo.tensor [2.],                                                    combo.tensor [[[2., 4., 6., 8.], [10., 12., 14., 16.], [18., 20., 22., 24.]], [[26., 28., 30., 32.], [34., 36., 38., 40.], [42., 44., 46., 48.]]]);
+              (combo.tensor [2., 3., 4., 5.],                                        combo.tensor [[[2., 6., 12., 20.], [10., 18., 28., 40.], [18., 30., 44., 60.]], [[26., 42., 60., 80.], [34., 54., 76., 100.], [42., 66., 92., 120.]]]);
+              (combo.tensor [[2.]],                                                  combo.tensor [[[2., 4., 6., 8.], [10., 12., 14., 16.], [18., 20., 22., 24.]], [[26., 28., 30., 32.], [34., 36., 38., 40.], [42., 44., 46., 48.]]]);
+              (combo.tensor [[2., 3., 4., 5.]],                                      combo.tensor [[[2., 6., 12., 20.], [10., 18., 28., 40.], [18., 30., 44., 60.]], [[26., 42., 60., 80.], [34., 54., 76., 100.], [42., 66., 92., 120.]]]);
+              (combo.tensor [[2.], [3.], [4.]],                                      combo.tensor [[[2., 4., 6., 8.], [15., 18., 21., 24.], [36., 40., 44., 48.]], [[26., 28., 30., 32.], [51., 54., 57., 60.], [84., 88., 92., 96.]]]);
+              (combo.tensor [[2., 3., 4., 5.], [3., 4., 5., 6.], [4., 5., 6., 7.]],  combo.tensor [[[2., 6., 12., 20.], [15., 24., 35., 48.], [36., 50., 66., 84.]], [[26., 42., 60., 80.], [51., 72., 95., 120.], [84., 110., 138., 168.]]]);
+              (combo.tensor [[[2.]]],                                                combo.tensor [[[2., 4., 6., 8.], [10., 12., 14., 16.], [18., 20., 22., 24.]], [[26., 28., 30., 32.], [34., 36., 38., 40.], [42., 44., 46., 48.]]]);
+              (combo.tensor [[[2., 3., 4., 5.]]],                                    combo.tensor [[[2., 6., 12., 20.], [10., 18., 28., 40.], [18., 30., 44., 60.]], [[26., 42., 60., 80.], [34., 54., 76., 100.], [42., 66., 92., 120.]]]);
+              (combo.tensor [[[2.], [3.], [4.]]],                                    combo.tensor [[[2., 4., 6., 8.], [15., 18., 21., 24.], [36., 40., 44., 48.]], [[26., 28., 30., 32.], [51., 54., 57., 60.], [84., 88., 92., 96.]]]);
+              (combo.tensor [[[2., 3., 4., 5.], [3., 4., 5., 6.], [4., 5., 6., 7.]]],combo.tensor [[[2., 6., 12., 20.], [15., 24., 35., 48.], [36., 50., 66., 84.]], [[26., 42., 60., 80.], [51., 72., 95., 120.], [84., 110., 138., 168.]]]);
+              (combo.tensor [[[2.]], [[3.]]],                                        combo.tensor [[[2., 4., 6., 8.], [10., 12., 14., 16.], [18., 20., 22., 24.]], [[39., 42., 45., 48.], [51., 54., 57., 60.], [63., 66., 69., 72.]]]);
+              (combo.tensor [[[2., 3., 4., 5.]], [[3., 4., 5., 6.]]],                combo.tensor [[[2., 6., 12., 20.],  [10., 18., 28., 40.], [18., 30., 44., 60.]], [[39., 56., 75., 96.], [51., 72., 95., 120.], [63., 88., 115., 144.]]]);
+              (combo.tensor [[[2.], [3.], [4.]], [[3.], [4.], [5.]]],                combo.tensor [[[2., 4., 6., 8.],  [15., 18., 21., 24.], [36., 40., 44., 48.]], [[39., 42., 45., 48.], [68., 72., 76., 80.], [105., 110., 115., 120.]]]); |]
+
+        Assert.CheckEqual(t6Expected, t6Results)
+        Assert.CheckEqual(t6Expected, t6CommuteResults)
+
+
+    [<Test>]
+    member _.TestTensorDivTT () =
+        for combo in Combos.FloatingPoint do 
+            let t1 = combo.tensor([1.; 2.]) / combo.tensor([3.; 4.])
+            let t1Correct = combo.tensor([0.333333; 0.5])
+
+            let t2 = combo.tensor([1.; 2.]) / combo.tensor(5.)
+            let t2Correct = combo.tensor([0.2; 0.4])
+
+            let t3 = combo.tensor([1.; 2.]) / 5.
+            let t3Correct = combo.tensor([0.2; 0.4])
+
+            let t4 = 5. / combo.tensor([1.; 2.])
+            let t4Correct = combo.tensor([5.; 2.5])
+
+            Assert.That(t1.allclose(t1Correct, 0.01))
+            Assert.That(t2.allclose(t2Correct, 0.01))
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.That(t4.allclose(t4Correct, 0.01))
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+            Assert.CheckEqual(t2.dtype, combo.dtype)
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+            Assert.CheckEqual(t4.dtype, combo.dtype)
+
+        // Integer and bool tensors get cast to the default floating point type for division
+        for combo in Combos.IntegralAndBool do 
+            let t1a = combo.tensor([2; 3; 4])
+            let t1b = combo.tensor([1; 2; 3])
+            let i1 = t1a / t1b
+            let i1Correct = t1a.cast(Dtype.Default) / t1b.cast(Dtype.Default)
+            Assert.That(i1Correct.allclose(i1, 0.01))
+
+            let t2a = combo.tensor(6)
+            let t2b = combo.tensor([1; 2; 3])
+            let i2 = t2a / t2b
+            let i2Correct = t2a.cast(Dtype.Default) / t2b.cast(Dtype.Default)
+            Assert.That(i2Correct.allclose(i2, 0.01))
+
+            let t3a = combo.tensor([6; 12; 18])
+            let t3b = combo.tensor(3)
+            let i3 = t3a / t3b
+            let i3Correct = t3a.cast(Dtype.Default) / t3b.cast(Dtype.Default)
+            Assert.That(i3Correct.allclose(i3, 0.01))
+
+    [<Test>]
+    member _.TestTensorPowTT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([1.; 2.]) ** combo.tensor([3.; 4.])
+            let t1Correct = combo.tensor([1.; 16.])
+
+            Assert.CheckEqual(t1Correct, t1)
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+            let t2 = combo.tensor([1.; 2.]) ** combo.tensor(5.)
+            let t2Correct = combo.tensor([1.; 32.])
+
+            Assert.CheckEqual(t2Correct, t2)
+            Assert.CheckEqual(t2.dtype, combo.dtype)
+
+            let t3 = combo.tensor(5.) ** combo.tensor([1.; 2.])
+            let t3Correct = combo.tensor([5.; 25.])
+
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            let t1 = combo.tensor([1.0])
+            isInvalidOp(fun () -> t1 ** t1)
+
+            let t2a = combo.tensor([1.0])
+            let t2b = combo.tensor(1.0)
+            isInvalidOp(fun () -> t2a ** t2b)
+
+            let t3a = combo.tensor(1.0)
+            let t3b = combo.tensor([1.0])
+            isInvalidOp(fun () -> t3a ** t3b)
+
+    [<Test>]
+    member _.TestTensorMatMulT2T2 () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                   [4.1215; 4.9130; 5.2462; 4.2981; 9.3622];
+                                   [7.4682; 5.2166; 5.1184; 1.9626; 0.7562]])
+            let t2 = combo.tensor([[5.1067; 0.0681];
+                                   [7.4633; 3.6027];
+                                   [9.0070; 7.3012];
+                                   [2.6639; 2.8728];
+                                   [7.9229; 2.3695]])
+
+            let t3 = t1.matmul(t2)
+            let t3Correct = combo.tensor([[118.0367; 56.6266];
+                                          [190.5926; 90.8155];
+                                          [134.3925; 64.1030]])
+
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+        for combo in Combos.Integral do 
+            let t1 = combo.tensor([[1; 2]])
+            let t2 = combo.tensor([[3]; [4]])
+
+            let t3 = t1.matmul(t2)
+            let t3Correct = combo.tensor([[11]])
+
+            Assert.That(t3.allclose(t3Correct, 0.0))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+        // Matmul of Bool tensor not allowed
+        //
+        //    t = torch.tensor([[True]], dtype=torch.bool)
+        //    t.matmul(t)
+        //
+        // RuntimeError: _th_mm not supported on CPUType for Bool
+
+        for combo in Combos.Bool do 
+            let t3a = combo.tensor([[true]])
+            isInvalidOp(fun () -> t3a.matmul(t3a))
+
+    [<Test>]
+    member _.TestTensorDot () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([8.0766, 3.3030, -2.1732, 8.9448, 1.1028])
+            let t2 = combo.tensor([5.1067, -0.0681, 7.4633, -3.6027, 9.0070])
+            let t3 = dsharp.dot(t1, t2)
+            let t3Correct = combo.tensor(2.5081)
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+        for combo in Combos.Integral do 
+            let t1 = combo.tensor([1; 2])
+            let t2 = combo.tensor([3; 4])
+
+            let t3 = dsharp.dot(t1, t2)
+            let t3Correct = combo.tensor(11)
+
+            Assert.That(t3.allclose(t3Correct, 0.0))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+        for combo in Combos.Bool do 
+            let t3a = combo.tensor([true])
+            isInvalidOp(fun () -> dsharp.dot(t3a, t3a))
+
+    [<Test>]
+    member _.TestTensorDiagonal () =
+        for combo in Combos.All do
+            let t1 = combo.arange(6.).view([2; 3])
+            let t1a = dsharp.diagonal(t1)
+            let t1b = dsharp.diagonal(t1, offset=1)
+            let t1c = dsharp.diagonal(t1, offset=2)
+            let t1d = dsharp.diagonal(t1, offset= -1)
+            let t1aCorrect = combo.tensor([0.,4.])
+            let t1bCorrect = combo.tensor([1.,5.])
+            let t1cCorrect = combo.tensor([2.])
+            let t1dCorrect = combo.tensor([3.])
+            let t2 = combo.arange(9.).view([3;3])
+            let t2a = dsharp.diagonal(t2)
+            let t2aCorrect = combo.tensor([0.,4.,8.])
+            Assert.CheckEqual(t1aCorrect, t1a)
+            Assert.CheckEqual(t1bCorrect, t1b)
+            Assert.CheckEqual(t1cCorrect, t1c)
+            Assert.CheckEqual(t1dCorrect, t1d)
+            Assert.CheckEqual(t2aCorrect, t2a)
+
+    [<Test>]
+    member _.TestTensorTrace () =
+        for combo in Combos.FloatingPoint do
+            let t1 = combo.arange(6.).view([2; 3])
+            let t1a = dsharp.trace(t1)
+            let t1aCorrect = combo.tensor(4.)
+            let t2 = combo.arange(9.).view([3;3])
+            let t2a = dsharp.trace(t2)
+            let t2aCorrect = combo.tensor(12.)
+            Assert.CheckEqual(t1aCorrect, t1a)
+            Assert.CheckEqual(t2aCorrect, t2a)
+
+        for combo in Combos.Integral do
+            let t1 = combo.arange(6.).view([2; 3])
+            let t1a = dsharp.trace(t1)
+            let t1aCorrect = combo.tensor(4., dtype=Dtype.Int64)
+            let t2 = combo.arange(9.).view([3;3])
+            let t2a = dsharp.trace(t2)
+            let t2aCorrect = combo.tensor(12., dtype=Dtype.Int64)
+            Assert.CheckEqual(t1aCorrect, t1a)
+            Assert.CheckEqual(t2aCorrect, t2a)
+
+        for combo in Combos.Bool do
+            let t1a = combo.tensor([[true]]).trace()
+            let t1aCorrect = combo.tensor(1., dtype=Dtype.Int64)
+            Assert.CheckEqual(t1aCorrect, t1a)
+
+    [<Test>]
+    member _.TestTensorMatMul11 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([8.0766; 3.3030; 2.1732; 8.9448; 1.1028])
+            let t2 = combo.tensor([5.1067; 7.4633; 3.6027; 9.0070; 7.3012])
+            let t3 = t1.matmul(t2)
+            let t3Correct = t1.dot(t2)
+
+            Assert.That(t3.allclose(t3Correct, 0.001))
+
+    [<Test>]
+    member _.TestTensorMatMul12 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([8.0766; 3.3030; 2.1732; 8.9448; 1.1028])
+            let t2 = combo.tensor([[5.1067; 0.0681];
+                                    [7.4633; 3.6027];
+                                    [9.0070; 7.3012];
+                                    [2.6639; 2.8728];
+                                    [7.9229; 2.3695]])
+            let t3 = t1.matmul(t2)
+            let t3Correct = t1.expand([1;5]).matmul(t2).squeeze(0)
+
+            Assert.That(t3.allclose(t3Correct, 0.001))
+
+    [<Test>]
+    member _.TestTensorMatMul13 () =
+        for combo in Combos.FloatingPointExcept16s do
+            // 5 --> 1x5 --> 3x1x5 (batching expansion)
+            let t1 = combo.tensor([8.0766; 3.3030; 2.1732; 8.9448; 1.1028])
+            
+            // 3x5x2 (batch dimension is 3)
+            let t2 = combo.tensor([[[5.1067; 0.0681];
+                                     [7.4633; 3.6027];
+                                     [9.0070; 7.3012];
+                                     [2.6639; 2.8728];
+                                     [7.9229; 2.3695]];
+                                    [[1.1067; 0.0681];
+                                     [2.4633; 3.6027];
+                                     [3.0070; 7.3012];
+                                     [4.6639; 2.8728];
+                                     [5.9229; 2.3695]];
+                                    [[7.1067; 0.0681];
+                                     [8.4633; 3.6027];
+                                     [7.0070; 7.3012];
+                                     [8.6639; 2.8728];
+                                     [7.9229; 2.3695]]])
+            let t3 = t1.matmul(t2)
+            let t3Correct = t1.expand([3;1;5]).matmul(t2).squeeze(1)
+
+            Assert.AreEqual([|3;2|], t3.shape)
+            Assert.That(t3.allclose(t3Correct, 0.001))
+
+    [<Test>]
+    member _.TestTensorMatMul21 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                    [5.1067; 7.4633; 3.6027; 9.0070; 7.3012]])
+            let t2 = combo.tensor([0.0681; 3.6027; 7.3012; 2.8728; 2.3695])
+            let t3 = t1.matmul(t2)
+            let t3Correct = t1.matmul(t2.unsqueeze(1)).squeeze(1)
+
+            Assert.That(t3.allclose(t3Correct, 0.001))
+
+    [<Test>]
+    member _.TestTensorMatMul31 () =
+        for combo in Combos.FloatingPointExcept16s do
+            //2 x 2 x 5
+            let t1 = combo.tensor([[[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                     [5.1067; 7.4633; 3.6027; 9.0070; 7.3012]];
+                                    [[9.0766; 4.3030; 2.1732; 8.9448; 1.1028];
+                                     [3.1067; 5.4633; 3.6027; 9.0070; 7.3012]]])
+            
+            // 5 --> 5x1 (matmul expand) -> 2x5x1 (batch expand)
+            let t2 = combo.tensor([0.0681; 3.6027; 7.3012; 2.8728; 2.3695])
+            // 2x2x5 * 2x5x1 --> 2x2x1 --> 2x2 (reverse matmul expand)
+            let t3 = t1.matmul(t2)
+            let t3Correct = t1.matmul(t2.unsqueeze(1)).squeeze(2)
+
+            Assert.AreEqual([|2;2|], t3.shape)
+            Assert.That(t3.allclose(t3Correct, 0.001))
+
+    [<Test>]
+    member _.TestTensorMatMul33 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                    [4.1215; 4.9130; 5.2462; 4.2981; 9.3622];
+                                    [7.4682; 5.2166; 5.1184; 1.9626; 0.7562]])
+            let t2 = combo.tensor([[5.1067; 0.0681];
+                                    [7.4633; 3.6027];
+                                    [9.0070; 7.3012];
+                                    [2.6639; 2.8728];
+                                    [7.9229; 2.3695]])
+
+            let t1Expanded = t1.expand([| 6;3;5 |])
+            let t2Expanded = t2.expand([| 6;5;2 |])
+            let t3Unexpanded = t1.matmul(t2)
+            let t3 = t1Expanded.matmul(t2Expanded)
+            let t3Correct = t3Unexpanded.expand([| 6;3;2 |])
+
+            Assert.That(t3.allclose(t3Correct, 0.001))
+
+    [<Test>]
+    member _.TestTensorMatMul44 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                    [4.1215; 4.9130; 5.2462; 4.2981; 9.3622];
+                                    [7.4682; 5.2166; 5.1184; 1.9626; 0.7562]])
+            let t2 = combo.tensor([[5.1067; 0.0681];
+                                    [7.4633; 3.6027];
+                                    [9.0070; 7.3012];
+                                    [2.6639; 2.8728];
+                                    [7.9229; 2.3695]])
+
+            let t1Expanded = t1.expand([| 2;6;3;5 |])
+            let t2Expanded = t2.expand([| 2;6;5;2 |])
+            let t3Unexpanded = t1.matmul(t2)
+            let t3 = t1Expanded.matmul(t2Expanded)
+            let t3Correct = t3Unexpanded.expand([| 2;6;3;2 |])
+
+            Assert.That(t3.allclose(t3Correct, 0.0001))
+
+    [<Test>]
+    member _.TestTensorMatMulBroadcast1 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                    [4.1215; 4.9130; 5.2462; 4.2981; 9.3622];
+                                    [7.4682; 5.2166; 5.1184; 1.9626; 0.7562]])
+            let t2 = combo.tensor([[5.1067; 0.0681];
+                                    [7.4633; 3.6027];
+                                    [9.0070; 7.3012];
+                                    [2.6639; 2.8728];
+                                    [7.9229; 2.3695]])
+
+            let t1Expanded = t1.expand([| 3;5 |])
+            let t2Expanded = t2.expand([| 2;6;5;2 |])
+            let t3Unexpanded = t1.matmul(t2)
+            let t3 = t1Expanded.matmul(t2Expanded)
+            let t3Correct = t3Unexpanded.expand([| 2;6;3;2 |])
+
+            Assert.That(t3.allclose(t3Correct, 0.00001))
+
+    [<Test>]
+    member _.TestTensorMatMulBroadcast2 () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t1 = combo.tensor([[8.0766; 3.3030; 2.1732; 8.9448; 1.1028];
+                                    [4.1215; 4.9130; 5.2462; 4.2981; 9.3622];
+                                    [7.4682; 5.2166; 5.1184; 1.9626; 0.7562]])
+            let t2 = combo.tensor([[5.1067; 0.0681];
+                                    [7.4633; 3.6027];
+                                    [9.0070; 7.3012];
+                                    [2.6639; 2.8728];
+                                    [7.9229; 2.3695]])
+
+            let t1Expanded = t1.expand([| 2;6;3;5 |])
+            let t2Expanded = t2.expand([| 2;1;5;2 |])
+            let t3Unexpanded = t1.matmul(t2)
+            let t3 = t1Expanded.matmul(t2Expanded)
+            let t3Correct = t3Unexpanded.expand([| 2;6;3;2 |])
+
+            Assert.That(t3.allclose(t3Correct, 0.00001))
+
+    [<Test>]
+    member _.TestTensorNegT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.])
+            let t1Neg = -t1
+            let t1NegCorrect = combo.tensor([-1.; -2.; -3.])
+
+            Assert.CheckEqual(t1NegCorrect, t1Neg)
+            Assert.CheckEqual(t1Neg.dtype, combo.dtype)
+
+        // Neg of Bool tensor not allowed
+        //
+        //    -torch.ones(10, dtype=torch.bool) 
+        //
+        // RuntimeError: Negation, the `-` operator, on a bool tensor is not supported. 
+
+        for combo in Combos.Bool do 
+            isInvalidOp(fun () -> -combo.tensor([1.0]))
+
+    [<Test>]
+    member _.TestTensorSumT () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.])
+            let t1Sum = t1.sum()
+            let t1SumCorrect = combo.tensor(6., dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t1Sum.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(t1SumCorrect, t1Sum)
+
+            // Now test cases where result type is set explicitly
+            for dtype2 in Dtypes.IntegralAndFloatingPoint do
+                let t1SumTyped = t1.sum(dtype=dtype2)
+                let t1SumTypedCorrect = combo.tensor(6., dtype=dtype2)
+                Assert.CheckEqual(t1SumTyped.dtype, dtype2)
+                Assert.CheckEqual(t1SumTypedCorrect, t1SumTyped)
+
+            let t2 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t2Sum = t2.sum()
+            let t2SumCorrect = combo.tensor(10., dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t2Sum.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(t2SumCorrect, t2Sum)
+
+        for combo in Combos.Bool do 
+            // Sum of Bool tensor is Int64 tensor in pytorch
+            let t3a = combo.tensor([true; true; false])
+            let t3 = t3a.sum()
+            let t3Correct = combo.tensor(2, dtype=Dtype.Int64)
+            Assert.CheckEqual(t3, t3Correct)
+
+    [<Test>]
+    member _.TestTensorSumToSizeT () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([1.; 2.; 3.])
+            let t1Sum = t1.sumToSize([| |])
+            let t1SumCorrect = combo.tensor(6., dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t1SumCorrect, t1Sum)
+
+            let t2 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t2Sum = t2.sumToSize([| |])
+            let t2SumCorrect = combo.tensor(10., dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t2SumCorrect, t2Sum)
+
+            let t3 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t3Sum = t3.sumToSize([| 2 |])
+            let t3SumCorrect = combo.tensor( [4.; 6.], dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t3SumCorrect, t3Sum)
+
+            let t4 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t4Sum = t4.sumToSize([| 1; 2 |])
+            let t4SumCorrect = combo.tensor( [ [4.; 6.] ], dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t4SumCorrect, t4Sum)
+
+            let t5 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t5Sum = t5.sumToSize([| 2; 1 |])
+            let t5SumCorrect = combo.tensor( [ [3.]; [7.] ], dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t5SumCorrect, t5Sum)
+
+    [<Test>]
+    member _.TestTensorSumToSizeSystematic () =
+        for combo in Combos.IntegralAndFloatingPoint do 
+            // Systematically test all legitimate reductions of 2x2x2 to smaller sizes
+            let t6 = combo.tensor([ [[1.; 2.]; [3.; 4.] ]; [[5.; 6.]; [7.; 8.] ] ])
+            let systematicResults = 
+                [| for i1 in 0..2 do 
+                      for i2 in (if i1 = 0 then 0 else 1)..2 do
+                         for i3 in (if i2 = 0 then 0 else 1)..2 do
+                            let newShape = 
+                                [| if i1 > 0 then yield i1
+                                   if i2 > 0 then yield i2
+                                   if i3 > 0 then yield i3 |]
+                            yield (newShape, t6.sumToSize(newShape)) |]
+        
+            let expectedResults = 
+                [|([||], combo.tensor (36., dtype=combo.dtype.SummationType));
+                  ([|1|], combo.tensor ([36.], dtype=combo.dtype.SummationType));
+                  ([|2|], combo.tensor ([16.; 20.], dtype=combo.dtype.SummationType));
+                  ([|1; 1|], combo.tensor ([[36.]], dtype=combo.dtype.SummationType));
+                  ([|1; 2|], combo.tensor ([[16.; 20.]], dtype=combo.dtype.SummationType));
+                  ([|2; 1|], combo.tensor([[14.]; [22.]], dtype=combo.dtype.SummationType));
+                  ([|2; 2|], combo.tensor([[6.; 8.]; [10.; 12.]], dtype=combo.dtype.SummationType));
+                  ([|1; 1; 1|], combo.tensor([[[36.]]], dtype=combo.dtype.SummationType));
+                  ([|1; 1; 2|], combo.tensor([[[16.; 20.]]], dtype=combo.dtype.SummationType));
+                  ([|1; 2; 1|], combo.tensor([[[14.]; [22.]]], dtype=combo.dtype.SummationType));
+                  ([|1; 2; 2|], combo.tensor([[[6.; 8.]; [10.; 12.]]], dtype=combo.dtype.SummationType));
+                  ([|2; 1; 1|], combo.tensor([[[10.]]; [[26.]]], dtype=combo.dtype.SummationType));
+                  ([|2; 1; 2|], combo.tensor([[[4.; 6.]]; [[12.; 14.]]], dtype=combo.dtype.SummationType));
+                  ([|2; 2; 1|], combo.tensor([[[3.]; [7.]]; [[11.]; [15.]]], dtype=combo.dtype.SummationType));
+                  ([|2; 2; 2|], combo.tensor([[[1.; 2.]; [3.; 4.]]; [[5.; 6.]; [7.; 8.]]], dtype=combo.dtype.SummationType))|]
+
+            Assert.CheckEqual(systematicResults, expectedResults)
+
+    [<Test>]
+    member _.TestTensorSumT2Dim0 () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t1 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t1Sum = t1.sum(0)
+            let t1SumCorrect = combo.tensor([4.; 6.], dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(t1SumCorrect, t1Sum)
+            Assert.CheckEqual(t1Sum.dtype, combo.dtype.SummationType)
+    
+    [<Test>]
+    member _.TestTensorSumDim () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t = combo.tensor([[[1.,2.,3.,4.], [5.,6.,7.,8.], [9.,10.,11.,12.]], [[13.,14.,15.,16.], [17.,18.,19.,20.], [21.,22.,23.,24.]]])
+            let tSum0 = t.sum(0)
+            let tSum0Correct = combo.tensor([[14.0f, 16.0f, 18.0f, 20.0f], [22.0f, 24.0f, 26.0f, 28.0f], [30.0f, 32.0f, 34.0f, 36.0f]], dtype=combo.dtype.SummationType)
+            let tSum1 = t.sum(1)
+            let tSum1Correct = combo.tensor([[15.0f, 18.0f, 21.0f, 24.0f], [51.0f, 54.0f, 57.0f, 60.0f]], dtype=combo.dtype.SummationType)
+            let tSum2 = t.sum(2)
+            let tSum2Correct = combo.tensor([[10.0f, 26.0f, 42.0f], [58.0f, 74.0f, 90.0f]], dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(tSum0.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(tSum1.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(tSum2.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(tSum0Correct, tSum0)
+            Assert.CheckEqual(tSum1Correct, tSum1)
+            Assert.CheckEqual(tSum2Correct, tSum2)
+    
+    [<Test>]
+    member _.TestTensorSumDimKeepDim () =
+        // Test all non-bool types
+        for combo in Combos.IntegralAndFloatingPoint do 
+            let t = combo.tensor([[[1.;2.;3.;4.]; [5.;6.;7.;8.]; [9.;10.;11.;12.]]; [[13.;14.;15.;16.]; [17.;18.;19.;20.]; [21.;22.;23.;24.]]])
+            let tSum0 = t.sum(0, keepDim=true)
+            let tSum0Correct = combo.tensor([[[14.0f; 16.0f; 18.0f; 20.0f]; [22.0f; 24.0f; 26.0f; 28.0f]; [30.0f; 32.0f; 34.0f; 36.0f]]], dtype=combo.dtype.SummationType)
+            let tSum1 = t.sum(1, keepDim=true)
+            let tSum1Correct = combo.tensor([[[15.0f; 18.0f; 21.0f; 24.0f]]; [[51.0f; 54.0f; 57.0f; 60.0f]]], dtype=combo.dtype.SummationType)
+            let tSum2 = t.sum(2, keepDim=true)
+            let tSum2Correct = combo.tensor([[[10.0f]; [26.0f]; [42.0f]]; [[58.0f]; [74.0f]; [90.0f]]], dtype=combo.dtype.SummationType)
+
+            Assert.CheckEqual(tSum0.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(tSum1.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(tSum2.dtype, combo.dtype.SummationType)
+            Assert.CheckEqual(tSum0Correct, tSum0)
+            Assert.CheckEqual(tSum1Correct, tSum1)
+            Assert.CheckEqual(tSum2Correct, tSum2)
+
+    [<Test>]
+    member _.TestTensorSumDimBackwards () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.randn([2;2;2])
+            let tsum_3 = t.sum(-3)
+            let tsum_2 = t.sum(-2)
+            let tsum_1 = t.sum(-1)
+            let tsum0 = t.sum(0)
+            let tsum1 = t.sum(1)
+            let tsum2 = t.sum(2)
+
+            Assert.CheckEqual(tsum_3, tsum0)
+            Assert.CheckEqual(tsum_2, tsum1)
+            Assert.CheckEqual(tsum_1, tsum2)
+
+    [<Test>]
+    member _.TestTensorMeanDimBackwards () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.randn([2;2;2])
+            let tmean_3 = t.mean(-3)
+            let tmean_2 = t.mean(-2)
+            let tmean_1 = t.mean(-1)
+            let tmean0 = t.mean(0)
+            let tmean1 = t.mean(1)
+            let tmean2 = t.mean(2)
+
+            Assert.CheckEqual(tmean_3, tmean0)
+            Assert.CheckEqual(tmean_2, tmean1)
+            Assert.CheckEqual(tmean_1, tmean2)
+
+    [<Test>]
+    member _.TestTensorVarianceDimBackwards () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.randn([2;2;2])
+            let tvariance_3 = t.var(-3)
+            let tvariance_2 = t.var(-2)
+            let tvariance_1 = t.var(-1)
+            let tvariance0 = t.var(0)
+            let tvariance1 = t.var(1)
+            let tvariance2 = t.var(2)
+
+            Assert.CheckEqual(tvariance_3, tvariance0)
+            Assert.CheckEqual(tvariance_2, tvariance1)
+            Assert.CheckEqual(tvariance_1, tvariance2)
+
+    [<Test>]
+    member _.TestTensorMean () =
+        for combo in Combos.FloatingPoint do 
+            let t = combo.tensor([[[1.;2.;3.;4.]; [5.;6.;7.;8.]; [9.;10.;11.;12.]]; [[13.;14.;15.;16.]; [17.;18.;19.;20.]; [21.;22.;23.;24.]]])
+            let tMean = t.mean()
+            let tMeanCorrect = combo.tensor(12.5)
+
+            Assert.CheckEqual(tMeanCorrect, tMean)
+            Assert.CheckEqual(tMean.dtype, combo.dtype)
+
+            // mean, dim={0,1,2}
+            (* Python:
+            import pytorch as torch
+            input = np[[[1.,2.,3.,4.], [5.,6.,7.,8.], [9.,10.,11.,12.]], [[13.,14.,15.,16.], [17.,18.,19.,20.], [21.,22.,23.,24.]]]
+            input.mean(1)
+            --> array([[15., 18., 21., 24.],[51., 54., 57., 60.]])
+            input.sum(2)
+            --> array([[10., 26., 42.],[58., 74., 90.]])
+            *)
+            let tMean0 = t.mean(0)
+            let tMean0Correct = combo.tensor([[7.; 8.; 9.; 10.]; [11.; 12.; 13.; 14.]; [15.; 16.; 17.; 18.]])
+            let tMean1 = t.mean(1)
+            let tMean1Correct = combo.tensor([[5.; 6.; 7.; 8.]; [17.; 18.; 19.; 20.]])
+            let tMean2 = t.mean(2)
+            let tMean2Correct = combo.tensor([[2.5; 6.5; 10.5]; [14.5; 18.5; 22.5]])
+
+            Assert.CheckEqual(tMean0Correct, tMean0)
+            Assert.CheckEqual(tMean1Correct, tMean1)
+            Assert.CheckEqual(tMean2Correct, tMean2)
+
+            // mean, dim={0,1,2}, keepDim=true
+            (* Python:
+            import torch
+            input = torch.tensor([[[1.,2.,3.,4.], [5.,6.,7.,8.], [9.,10.,11.,12.]], [[13.,14.,15.,16.], [17.,18.,19.,20.], [21.,22.,23.,24.]]])
+            input.mean(0,keepdim=True)
+            # --> tensor([[[ 7.,  8.,  9., 10.],[11., 12., 13., 14.],[15., 16., 17., 18.]]])
+            input.mean(1,keepdim=True)
+            # --> tensor([[[ 5.,  6.,  7.,  8.]],[[17., 18., 19., 20.]]])
+            input.mean(2,keepdim=True)
+            # --> tensor([[[ 2.5000],[ 6.5000],[10.5000]],[[14.5000],[18.5000],[22.5000]]])
+            *)
+            let tMeanKeepDim0 = t.mean(0, keepDim=true)
+            let tMeanKeepDim0Correct = combo.tensor([[[7.; 8.; 9.; 10.]; [11.; 12.; 13.; 14.]; [15.; 16.; 17.; 18.]]])
+            let tMeanKeepDim1 = t.mean(1, keepDim=true)
+            let tMeanKeepDim1Correct = combo.tensor([[[5.; 6.; 7.; 8.]]; [[17.; 18.; 19.; 20.]]])
+            let tMeanKeepDim2 = t.mean(2, keepDim=true)
+            let tMeanKeepDim2Correct = combo.tensor([[[2.5]; [6.5]; [10.5]]; [[14.5]; [18.5]; [22.5]]])
+
+            Assert.CheckEqual(tMeanKeepDim0, tMeanKeepDim0Correct)
+            Assert.CheckEqual(tMeanKeepDim1, tMeanKeepDim1Correct)
+            Assert.CheckEqual(tMeanKeepDim2, tMeanKeepDim2Correct)
+
+    [<Test>]
+    member _.TestTensorStddev () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.tensor([[[0.3787;0.7515;0.2252;0.3416];
+                [0.6078;0.4742;0.7844;0.0967];
+                [0.1416;0.1559;0.6452;0.1417]];
+ 
+                [[0.0848;0.4156;0.5542;0.4166];
+                [0.5187;0.0520;0.4763;0.1509];
+                [0.4767;0.8096;0.1729;0.6671]]])
+            let tStddev = t.std()
+            let tStddevCorrect = combo.tensor(0.2398)
+
+            Assert.That(tStddev.allclose(tStddevCorrect, 0.01))
+            Assert.CheckEqual(tStddev.dtype, combo.dtype)
+
+            // stddev, dim={0,1,2,3}, keepDim=true
+            let tStddev0 = t.std(0)
+            let tStddev0Correct = combo.tensor([[0.2078; 0.2375; 0.2326; 0.0530];
+                [0.0630; 0.2985; 0.2179; 0.0383];
+                [0.2370; 0.4623; 0.3339; 0.3715]])
+            let tStddev1 = t.std(1)
+            let tStddev1Correct = combo.tensor([[0.2331; 0.2981; 0.2911; 0.1304];
+                [0.2393; 0.3789; 0.2014; 0.2581]])
+            let tStddev2 = t.std(2)
+            let tStddev2Correct = combo.tensor([[0.2277; 0.2918; 0.2495];[0.1996; 0.2328; 0.2753]])
+
+            Assert.That(tStddev0.allclose(tStddev0Correct, 0.01))
+            Assert.That(tStddev1.allclose(tStddev1Correct, 0.01))
+            Assert.That(tStddev2.allclose(tStddev2Correct, 0.01))
+            Assert.CheckEqual(tStddev0.dtype, combo.dtype)
+            Assert.CheckEqual(tStddev1.dtype, combo.dtype)
+            Assert.CheckEqual(tStddev2.dtype, combo.dtype)
+
+            // stddev, dim={0,1,2,3}, keepDim=true
+            (* Python:
+            import torch
+            input = torch.tensor([[[0.3787,0.7515,0.2252,0.3416],[0.6078,0.4742,0.7844,0.0967],[0.1416,0.1559,0.6452,0.1417]],[[0.0848,0.4156,0.5542,0.4166],[0.5187,0.0520,0.4763,0.1509],[0.4767,0.8096,0.1729,0.6671]]])
+            input.std(0,keepdim=True)
+            # --> tensor([[[0.2078, 0.2375, 0.2326, 0.0530],[0.0630, 0.2985, 0.2179, 0.0383],[0.2370, 0.4622, 0.3340, 0.3715]]])
+            input.std(1,keepdim=True)
+            # --> tensor([[[0.2331, 0.2980, 0.2911, 0.1304]],[[0.2393, 0.3789, 0.2015, 0.2581]]])
+            input.std(2,keepdim=True)
+            # --> tensor([[[0.2278],[0.2918],[0.2495]],[[0.1996],[0.2328],[0.2753]]]) 
+            *)
+            let tStddev0 = t.std(0, keepDim=true)
+            let tStddev0Correct = combo.tensor([[[0.2078; 0.2375; 0.2326; 0.0530];[0.0630; 0.2985; 0.2179; 0.0383];[0.2370; 0.4623; 0.3339; 0.3715]]])
+            let tStddev1 = t.std(1, keepDim=true)
+            let tStddev1Correct = combo.tensor([[[0.2331; 0.2981; 0.2911; 0.1304]];[[0.2393; 0.3789; 0.2014; 0.2581]]])
+            let tStddev2 = t.std(2, keepDim=true)
+            let tStddev2Correct = combo.tensor([[[0.2277]; [0.2918]; [0.2495]];[[0.1996]; [0.2328]; [0.2753]]])
+
+            Assert.That(tStddev0.allclose(tStddev0Correct, 0.01))
+            Assert.That(tStddev1.allclose(tStddev1Correct, 0.01))
+            Assert.That(tStddev2.allclose(tStddev2Correct, 0.01))
+
+    [<Test>]
+    member _.TestTensorVariance () =
+        for combo in Combos.FloatingPointExcept16s do 
+            (* Python:
+            import torch
+            input = torch.tensor([[[0.3787,0.7515,0.2252,0.3416],[0.6078,0.4742,0.7844,0.0967],[0.1416,0.1559,0.6452,0.1417]],[[0.0848,0.4156,0.5542,0.4166],[0.5187,0.0520,0.4763,0.1509],[0.4767,0.8096,0.1729,0.6671]]])
+            input.var()
+            *)
+            let t = combo.tensor([[[0.3787;0.7515;0.2252;0.3416]; [0.6078;0.4742;0.7844;0.0967]; [0.1416;0.1559;0.6452;0.1417]]; [[0.0848;0.4156;0.5542;0.4166];[0.5187;0.0520;0.4763;0.1509];[0.4767;0.8096;0.1729;0.6671]]])
+            let tVariance = t.var()
+            let tVarianceCorrect = combo.tensor(0.0575)
+
+            Assert.That(tVariance.allclose(tVarianceCorrect, 0.01))
+
+            // Variance, dim={0,1,2,3}
+            (* Python:
+            input.var(0)
+            # --> tensor([[0.0432, 0.0564, 0.0541, 0.0028],[0.0040, 0.0891, 0.0475, 0.0015],[0.0561, 0.2137, 0.1115, 0.1380]])
+            input.var(1)
+            # --> tensor([[0.0543, 0.0888, 0.0847, 0.0170],[0.0573, 0.1436, 0.0406, 0.0666]])
+            input.var(2)
+            # --> tensor([[0.0519, 0.0852, 0.0622],[0.0398, 0.0542, 0.0758]])
+            *)
+            let tVariance0 = t.var(0)
+            let tVariance0Correct = combo.tensor([[0.0432; 0.0564; 0.0541; 0.0028];[0.0040; 0.0891; 0.0475; 0.0015];[0.0561; 0.2137; 0.1115; 0.1380]])
+            let tVariance1 = t.var(1)
+            let tVariance1Correct = combo.tensor([[0.0543; 0.0888; 0.0847; 0.0170];[0.0573; 0.1436; 0.0406; 0.0666]])
+            let tVariance2 = t.var(2)
+            let tVariance2Correct = combo.tensor([[0.0519; 0.0852; 0.0622];[0.0398; 0.0542; 0.0758]])
+
+            Assert.That(tVariance0.allclose(tVariance0Correct, 0.01, 0.01))
+            Assert.That(tVariance1.allclose(tVariance1Correct, 0.01, 0.01))
+            Assert.That(tVariance2.allclose(tVariance2Correct, 0.01, 0.01))
+            Assert.CheckEqual(tVariance0.dtype, combo.dtype)
+            Assert.CheckEqual(tVariance1.dtype, combo.dtype)
+            Assert.CheckEqual(tVariance2.dtype, combo.dtype)
+
+            let tVarianceBiased = t.var(unbiased=false)
+            let tVarianceBiasedCorrect = combo.tensor(0.0551)
+
+            Assert.That(tVarianceBiased.allclose(tVarianceBiasedCorrect, 0.01))
+
+            let tVarianceBiased0 = t.var(0, unbiased=false)
+            let tVarianceBiased0Correct = combo.tensor([[0.0216, 0.0282, 0.0271, 0.0014],
+                                                        [0.0020, 0.0446, 0.0237, 0.0007],
+                                                        [0.0281, 0.1068, 0.0558, 0.0690]])
+            let tVarianceBiased1 = t.var(1, unbiased=false)
+            let tVarianceBiased1Correct = combo.tensor([[0.0362, 0.0592, 0.0565, 0.0113],
+                                                        [0.0382, 0.0957, 0.0271, 0.0444]])
+            let tVarianceBiased2 = t.var(2, unbiased=false)
+            let tVarianceBiased2Correct = combo.tensor([[0.0389, 0.0639, 0.0467],
+                                                        [0.0299, 0.0407, 0.0568]])
+
+            Assert.That(tVarianceBiased0.allclose(tVarianceBiased0Correct, 0.01, 0.01))
+            Assert.That(tVarianceBiased1.allclose(tVarianceBiased1Correct, 0.01, 0.01))
+            Assert.That(tVarianceBiased2.allclose(tVarianceBiased2Correct, 0.01, 0.01))
+            Assert.CheckEqual(tVarianceBiased0.dtype, combo.dtype)
+            Assert.CheckEqual(tVarianceBiased1.dtype, combo.dtype)
+            Assert.CheckEqual(tVarianceBiased2.dtype, combo.dtype)
+
+        let tPrecisionCheckData = dsharp.tensor([1e10+4.0; 1e10+7.0; 1e10+13.0;1e10+16.0],dtype=Float64)
+        let tPrecisionCheck = tPrecisionCheckData.var()
+        let tPrecisionCheck0 = tPrecisionCheckData.var(0)
+        let tPrecisionCheckCorrect = dsharp.tensor(30.0,dtype=Float64)
+        Assert.That(tPrecisionCheck.allclose(tPrecisionCheckCorrect, 0.01, 0.01))
+        Assert.That(tPrecisionCheck0.allclose(tPrecisionCheckCorrect, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorVarianceKeepDim () =
+        for combo in Combos.FloatingPoint do 
+            // Variance, dim={0,1,2,3}, keepDim=true
+            (* Python:
+            import torch
+            input = torch.tensor([[[0.3787,0.7515,0.2252,0.3416],[0.6078,0.4742,0.7844,0.0967],[0.1416,0.1559,0.6452,0.1417]],[[0.0848,0.4156,0.5542,0.4166],[0.5187,0.0520,0.4763,0.1509],[0.4767,0.8096,0.1729,0.6671]]])
+            input.var(0,keepdim=True)
+            # --> tensor([[[0.0432, 0.0564, 0.0541, 0.0028],[0.0040, 0.0891, 0.0475, 0.0015],[0.0561, 0.2137, 0.1115, 0.1380]]])
+            input.var(1,keepdim=True)
+            # --> tensor([[[0.0543, 0.0888, 0.0847, 0.0170]],[[0.0573, 0.1436, 0.0406, 0.0666]]])
+            input.var(2,keepdim=True)
+            # --> tensor([[[0.0519],[0.0852],[0.0622]],[[0.0398],[0.0542],[0.0758]]])
+            *)
+            let t = combo.tensor([[[0.3787;0.7515;0.2252;0.3416]; [0.6078;0.4742;0.7844;0.0967]; [0.1416;0.1559;0.6452;0.1417]]; [[0.0848;0.4156;0.5542;0.4166];[0.5187;0.0520;0.4763;0.1509];[0.4767;0.8096;0.1729;0.6671]]])
+            let tVariance0 = t.var(0, keepDim=true)
+            let tVariance0Correct = combo.tensor([[[0.0432; 0.0564; 0.0541; 0.0028];[0.0040; 0.0891; 0.0475; 0.0015];[0.0561; 0.2137; 0.1115; 0.1380]]])
+            let tVariance1 = t.var(1, keepDim=true)
+            let tVariance1Correct = combo.tensor([[[0.0543; 0.0888; 0.0847; 0.0170]];[[0.0573; 0.1436; 0.0406; 0.0666]]])
+            let tVariance2 = t.var(2, keepDim=true)
+            let tVariance2Correct = combo.tensor([[[0.0519];[0.0852];[0.0622]];[[0.0398];[0.0542];[0.0758]]])
+
+            Assert.That(tVariance0.allclose(tVariance0Correct, 0.01, 0.01))
+            Assert.That(tVariance1.allclose(tVariance1Correct, 0.01, 0.01))
+            Assert.That(tVariance2.allclose(tVariance2Correct, 0.01, 0.01))
+            Assert.CheckEqual(tVariance0.dtype, combo.dtype)
+            Assert.CheckEqual(tVariance1.dtype, combo.dtype)
+            Assert.CheckEqual(tVariance2.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorCovariance () =
+        (* Python:
+        import numpy as np
+        a = np.array([[0.3787,0.7515,0.2252,0.3416],
+            [0.6078,0.4742,0.7844,0.0967],
+            [0.1416,0.1559,0.6452,0.1417]])
+        a0 = a[[0],0]
+        a1 = a[0,:]
+        a2 = a[0:1,:]
+        fweights = np.array([1,7,7,4])
+        aweights = np.array([0.7241, 0.2481, 0.4878, 0.6862])
+
+
+        np.cov(a0)
+        np.cov(a1)
+        np.cov(a2)
+
+        np.cov(a)
+        np.cov(a,ddof=0)
+        np.cov(a,fweights=fweights)
+        np.cov(a,aweights=aweights)
+        *)
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([[0.3787;0.7515;0.2252;0.3416];
+                                  [0.6078;0.4742;0.7844;0.0967];
+                                  [0.1416;0.1559;0.6452;0.1417]])
+            let t0 = t.[0,0]
+            let t1 = t.[0]
+            let t2 = t.[0].view([1;-1])
+            let fweights = combo.tensor([1;7;7;4],dtype=Dtype.Int32)
+            let aweights = combo.tensor([0.7241; 0.2481; 0.4878; 0.6862])
+
+            // to suppress printfn from this test "Warning: degress of freedom <= 0"
+            Console.SetOut(IO.TextWriter.Null)
+            let t0Unbiased = t0.cov()
+            Assert.That(t0Unbiased.isnan().toBool())
+            // restore stdout
+            let stdout = new IO.StreamWriter(Console.OpenStandardOutput())
+            stdout.AutoFlush <- true
+            Console.SetOut(stdout)
+            
+            let t0Biased = t0.cov(correction= int64 0)
+            let t0BiasedCorrect = combo.tensor(0)
+            Assert.That(t0Biased.allclose(t0BiasedCorrect,0.01,0.01))
+            
+            let t1Unbiased = t1.cov()
+            let t1UnbiasedCorrect = combo.tensor(0.0518731)
+            Assert.That(t1Unbiased.allclose(t1UnbiasedCorrect,0.01,0.01))
+
+
+            let t1Biased = t1.cov(correction= int64 0)
+            let t1BiasedCorrect = combo.tensor(0.03890482)
+            Assert.That(t1Biased.allclose(t1BiasedCorrect,0.01,0.01))
+
+            Assert.That(t1.cov().allclose(t2.cov(),0.01,0.01))
+            Assert.That(t1.cov(correction= int64 0).allclose(t2.cov(correction= int64 0),0.01,0.01))
+
+            let tUnbiased = t.cov()
+            let tUnbiasedCorrect = combo.tensor([[ 0.0518731 , -0.01221014, -0.03185672],
+                                                 [-0.01221014,  0.08516011,  0.04919771],
+                                                 [-0.03185672,  0.04919771,  0.06224549]])
+            Assert.That(tUnbiased.allclose(tUnbiasedCorrect,0.01,0.01))
+
+            let tBiased = t.cov(correction= int64 0)
+            let tBiasedCorrect = 
+                combo.tensor([[ 0.03890482, -0.0091576 , -0.02389254],
+                              [-0.0091576 ,  0.06387008,  0.03689828],
+                              [-0.02389254,  0.03689828,  0.04668411]])
+            Assert.That(tBiased.allclose(tBiasedCorrect,0.01,0.01))
+
+            let tUnbiasedFWeights = t.cov(fweights=fweights)
+            let tUnbiasedFWeightsCorrect = 
+                combo.tensor([[ 0.05789406, -0.01862841, -0.04269081],
+                              [-0.01862841,  0.0682321 ,  0.0523144 ],
+                              [-0.04269081,  0.0523144 ,  0.06026907]])
+            Assert.That(tUnbiasedFWeights.allclose(tUnbiasedFWeightsCorrect,0.01,0.01))
+
+            let tBiasedFWeights = t.cov(fweights=fweights,correction= int64 0)
+            let tBiasedFWeightsCorrect = 
+                combo.tensor([[ 0.054847  , -0.01764797, -0.04044393],
+                              [-0.01764797,  0.06464094,  0.04956101],
+                              [-0.04044393,  0.04956101,  0.05709701]])
+            Assert.That(tBiasedFWeights.allclose(tBiasedFWeightsCorrect,0.01,0.01))
+
+            let tUnbiasedAWeights = t.cov(aweights=aweights)
+            let tUnbiasedAWeightsCorrect = 
+                combo.tensor([[ 0.03039008, -0.00885102, -0.02299303],
+                              [-0.00885102,  0.10213812,  0.05019765],
+                              [-0.02299303,  0.05019765,  0.06144794]])
+            Assert.That(tUnbiasedAWeights.allclose(tUnbiasedAWeightsCorrect,0.01,0.01))
+
+            let tBiasedAWeights = t.cov(aweights=aweights,correction= int64 0)
+            let tBiasedAWeightsCorrect = 
+                combo.tensor([[ 0.0218481 , -0.00636319, -0.0165302 ],
+                              [-0.00636319,  0.07342935,  0.0360882 ],
+                              [-0.0165302 ,  0.0360882 ,  0.04417628]])
+            Assert.That(tBiasedAWeights.allclose(tBiasedAWeightsCorrect,0.01,0.01))
+
+            let tUnbiasedFWeightsAWeights = t.cov(fweights=fweights,aweights=aweights)
+            let tUnbiasedFWeightsAWeightsCorrect = 
+                combo.tensor([[ 0.04020249, -0.01536804, -0.03199123],
+                              [-0.01536804,  0.09027013,  0.06286618],
+                              [-0.03199123,  0.06286618,  0.0633787 ]])
+            Assert.That(tUnbiasedFWeightsAWeights.allclose(tUnbiasedFWeightsAWeightsCorrect,0.01,0.01))
+
+            let tBiasedFWeightsAWeights = t.cov(fweights=fweights,aweights=aweights,correction= int64 0)
+            let tBiasedFWeightsAWeightsCorrect = 
+                combo.tensor([[ 0.03776553, -0.01443647, -0.03005202],
+                              [-0.01443647,  0.08479822,  0.05905542],
+                              [-0.03005202,  0.05905542,  0.05953687]])
+            Assert.That(tBiasedFWeightsAWeights.allclose(tBiasedFWeightsAWeightsCorrect,0.01,0.01))
+
+    [<Test>]
+    member _.TestTensorCorrCoef () =
+        (* Python:
+        import numpy as np
+        a = np.array([[0.3787,0.7515,0.2252,0.3416],
+            [0.6078,0.4742,0.7844,0.0967],
+            [0.1416,0.1559,0.6452,0.1417]])
+        np.corrcoef(a)
+        *)
+        for combo in Combos.FloatingPointExcept16s do
+            let t = combo.tensor([[0.3787;0.7515;0.2252;0.3416];
+                                  [0.6078;0.4742;0.7844;0.0967];
+                                  [0.1416;0.1559;0.6452;0.1417]])
+            let tCorrCoef = t.corrcoef()
+            let tCorrCoefCorrect =
+                combo.tensor([[ 1.        , -0.18370941, -0.56062968],
+                              [-0.18370941,  1.        ,  0.67572941],
+                              [-0.56062968,  0.67572941,  1.        ]])
+            Assert.That(tCorrCoef.allclose(tCorrCoefCorrect,0.01,0.01))
+            
+    [<Test>]
+    member _.TestTensorPermuteT () =
+        for combo in Combos.All do 
+            let t = combo.arange(2*3*4*5).view([2;3;4;5]).cast(combo.dtype)
+            
+            let t0123 = t.permute([0;1;2;3])
+            let t0123Correct = t
+
+            let t1023 = t.permute([1;0;2;3])
+            let t1023Correct = combo.tensor([[[[  0,   1,   2,   3,   4],
+                                                  [  5,   6,   7,   8,   9],
+                                                  [ 10,  11,  12,  13,  14],
+                                                  [ 15,  16,  17,  18,  19]],
+
+                                                 [[ 60,  61,  62,  63,  64],
+                                                  [ 65,  66,  67,  68,  69],
+                                                  [ 70,  71,  72,  73,  74],
+                                                  [ 75,  76,  77,  78,  79]]],
+
+
+                                                [[[ 20,  21,  22,  23,  24],
+                                                  [ 25,  26,  27,  28,  29],
+                                                  [ 30,  31,  32,  33,  34],
+                                                  [ 35,  36,  37,  38,  39]],
+
+                                                 [[ 80,  81,  82,  83,  84],
+                                                  [ 85,  86,  87,  88,  89],
+                                                  [ 90,  91,  92,  93,  94],
+                                                  [ 95,  96,  97,  98,  99]]],
+
+
+                                                [[[ 40,  41,  42,  43,  44],
+                                                  [ 45,  46,  47,  48,  49],
+                                                  [ 50,  51,  52,  53,  54],
+                                                  [ 55,  56,  57,  58,  59]],
+
+                                                 [[100, 101, 102, 103, 104],
+                                                  [105, 106, 107, 108, 109],
+                                                  [110, 111, 112, 113, 114],
+                                                  [115, 116, 117, 118, 119]]]])
+
+            let t1032 = t.permute([1;0;3;2])
+            let t1032Correct = combo.tensor([[[[  0,   5,  10,  15],
+                                                  [  1,   6,  11,  16],
+                                                  [  2,   7,  12,  17],
+                                                  [  3,   8,  13,  18],
+                                                  [  4,   9,  14,  19]],
+
+                                                 [[ 60,  65,  70,  75],
+                                                  [ 61,  66,  71,  76],
+                                                  [ 62,  67,  72,  77],
+                                                  [ 63,  68,  73,  78],
+                                                  [ 64,  69,  74,  79]]],
+
+
+                                                [[[ 20,  25,  30,  35],
+                                                  [ 21,  26,  31,  36],
+                                                  [ 22,  27,  32,  37],
+                                                  [ 23,  28,  33,  38],
+                                                  [ 24,  29,  34,  39]],
+
+                                                 [[ 80,  85,  90,  95],
+                                                  [ 81,  86,  91,  96],
+                                                  [ 82,  87,  92,  97],
+                                                  [ 83,  88,  93,  98],
+                                                  [ 84,  89,  94,  99]]],
+
+
+                                                [[[ 40,  45,  50,  55],
+                                                  [ 41,  46,  51,  56],
+                                                  [ 42,  47,  52,  57],
+                                                  [ 43,  48,  53,  58],
+                                                  [ 44,  49,  54,  59]],
+
+                                                 [[100, 105, 110, 115],
+                                                  [101, 106, 111, 116],
+                                                  [102, 107, 112, 117],
+                                                  [103, 108, 113, 118],
+                                                  [104, 109, 114, 119]]]])
+            let t3210 = t.permute([3;2;1;0])
+            let t3210Correct = combo.tensor([[[[  0,  60],
+                                                  [ 20,  80],
+                                                  [ 40, 100]],
+
+                                                 [[  5,  65],
+                                                  [ 25,  85],
+                                                  [ 45, 105]],
+
+                                                 [[ 10,  70],
+                                                  [ 30,  90],
+                                                  [ 50, 110]],
+
+                                                 [[ 15,  75],
+                                                  [ 35,  95],
+                                                  [ 55, 115]]],
+
+
+                                                [[[  1,  61],
+                                                  [ 21,  81],
+                                                  [ 41, 101]],
+
+                                                 [[  6,  66],
+                                                  [ 26,  86],
+                                                  [ 46, 106]],
+
+                                                 [[ 11,  71],
+                                                  [ 31,  91],
+                                                  [ 51, 111]],
+
+                                                 [[ 16,  76],
+                                                  [ 36,  96],
+                                                  [ 56, 116]]],
+
+
+                                                [[[  2,  62],
+                                                  [ 22,  82],
+                                                  [ 42, 102]],
+
+                                                 [[  7,  67],
+                                                  [ 27,  87],
+                                                  [ 47, 107]],
+
+                                                 [[ 12,  72],
+                                                  [ 32,  92],
+                                                  [ 52, 112]],
+
+                                                 [[ 17,  77],
+                                                  [ 37,  97],
+                                                  [ 57, 117]]],
+
+
+                                                [[[  3,  63],
+                                                  [ 23,  83],
+                                                  [ 43, 103]],
+
+                                                 [[  8,  68],
+                                                  [ 28,  88],
+                                                  [ 48, 108]],
+
+                                                 [[ 13,  73],
+                                                  [ 33,  93],
+                                                  [ 53, 113]],
+
+                                                 [[ 18,  78],
+                                                  [ 38,  98],
+                                                  [ 58, 118]]],
+
+
+                                                [[[  4,  64],
+                                                  [ 24,  84],
+                                                  [ 44, 104]],
+
+                                                 [[  9,  69],
+                                                  [ 29,  89],
+                                                  [ 49, 109]],
+
+                                                 [[ 14,  74],
+                                                  [ 34,  94],
+                                                  [ 54, 114]],
+
+                                                 [[ 19,  79],
+                                                  [ 39,  99],
+                                                  [ 59, 119]]]])
+
+            Assert.CheckEqual(t0123Correct, t0123)
+            Assert.CheckEqual(t1023Correct, t1023)
+            Assert.CheckEqual(t1032Correct, t1032)
+            Assert.CheckEqual(t3210Correct, t3210)
+            Assert.CheckEqual(t0123.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorTransposeT () =
+        for combo in Combos.All do 
+            let t = combo.arange(24).view([2;4;3]).cast(combo.dtype)
+            
+            let t00 = t.transpose(0, 0)
+            let t00Correct = t
+
+            let t01 = t.transpose(0, 1)
+            let t01Correct = combo.tensor([[[ 0,  1,  2],
+                                             [12, 13, 14]],
+
+                                            [[ 3,  4,  5],
+                                             [15, 16, 17]],
+
+                                            [[ 6,  7,  8],
+                                             [18, 19, 20]],
+
+                                            [[ 9, 10, 11],
+                                             [21, 22, 23]]])
+            let t02 = t.transpose(0, 2)
+            let t02Correct = combo.tensor([[[ 0, 12],
+                                             [ 3, 15],
+                                             [ 6, 18],
+                                             [ 9, 21]],
+
+                                            [[ 1, 13],
+                                             [ 4, 16],
+                                             [ 7, 19],
+                                             [10, 22]],
+
+                                            [[ 2, 14],
+                                             [ 5, 17],
+                                             [ 8, 20],
+                                             [11, 23]]])
+            let t12 = t.transpose(1, 2)
+            let t12Correct = combo.tensor([[[ 0,  3,  6,  9],
+                                             [ 1,  4,  7, 10],
+                                             [ 2,  5,  8, 11]],
+
+                                            [[12, 15, 18, 21],
+                                             [13, 16, 19, 22],
+                                             [14, 17, 20, 23]]])
+
+            Assert.CheckEqual(t00Correct, t00)
+            Assert.CheckEqual(t01Correct, t01)
+            Assert.CheckEqual(t02Correct, t02)
+            Assert.CheckEqual(t12Correct, t12)
+            Assert.CheckEqual(t00.dtype, combo.dtype)
+            Assert.CheckEqual(t01.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorTransposeT2 () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([[1.; 2.; 3.]; [4.; 5.; 6.]])
+            let t1Transpose = t1.transpose()
+            let t1TransposeCorrect = combo.tensor([[1.; 4.]; [2.; 5.]; [3.; 6.]])
+
+            let t2 = combo.tensor([[1.; 2.]; [3.; 4.]])
+            let t2TransposeTranspose = t2.transpose().transpose()
+            let t2TransposeTransposeCorrect = t2
+
+            Assert.CheckEqual(t1TransposeCorrect, t1Transpose)
+            Assert.CheckEqual(t2TransposeTransposeCorrect, t2TransposeTranspose)
+            Assert.CheckEqual(t1Transpose.dtype, combo.dtype)
+            Assert.CheckEqual(t2TransposeTranspose.dtype, combo.dtype)
+
+    member _.TestTensorSignT () =
+        // Test all signed types
+        for combo in Combos.SignedIntegralAndFloatingPoint do 
+            let t1 = combo.tensor([-1.; -2.; 0.; 3.])
+            let t1Sign = t1.sign()
+            let t1SignCorrect = combo.tensor([-1.; -1.; 0.; 1.])
+
+            Assert.CheckEqual(t1SignCorrect, t1Sign)
+            Assert.CheckEqual(t1Sign.dtype, combo.dtype)
+
+        // Test all signed types
+        for combo in Combos.UnsignedIntegral do 
+            let t1 = combo.tensor([1; 1; 0; 3])
+            let t1Sign = t1.sign()
+            let t1SignCorrect = combo.tensor([1; 1; 0; 1])
+
+            Assert.CheckEqual(t1SignCorrect, t1Sign)
+            Assert.CheckEqual(t1Sign.dtype, combo.dtype)
+
+        // Test bool type separately
+        // Note, PyTorch 'torch.tensor([True, False]).sign()' gives 'tensor([ True, False])'
+        for combo in Combos.AllDevicesAndBackendsFloat32 do
+            let t1Bool = combo.tensor([true;false], dtype=Dtype.Bool)
+            let t1BoolSignCorrect = combo.tensor([true; false], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(t1BoolSignCorrect, t1Bool.sign())
+
+    [<Test>]
+    member _.TestTensorFloorT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Floor = t1.floor()
+            let t1FloorCorrect = combo.tensor([0.; 0.; 0.; 0.; 0.])
+
+            Assert.That(t1Floor.allclose(t1FloorCorrect, 0.01))
+            Assert.CheckEqual(t1Floor.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).floor())
+
+    [<Test>]
+    member _.TestTensorCeilT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Ceil = t1.ceil()
+            let t1CeilCorrect = combo.tensor([1.; 1.; 1.; 1.; 1.])
+
+            Assert.That(t1Ceil.allclose(t1CeilCorrect, 0.01))
+            Assert.CheckEqual(t1Ceil.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).ceil())
+
+    [<Test>]
+    member _.TestTensorRoundT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Round = t1.round()
+            let t1RoundCorrect = combo.tensor([1.; 0.; 0.; 1.; 1.])
+
+            Assert.That(t1Round.allclose(t1RoundCorrect, 0.01))
+            Assert.CheckEqual(t1Round.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).round())
+
+    [<Test>]
+    member _.TestTensorAbsT () =
+        for combo in Combos.SignedIntegralAndFloatingPoint do 
+            let t1 = combo.tensor([-1.; -2.; 0.; 3.])
+            let t1Abs = t1.abs()
+            let t1AbsCorrect = combo.tensor([1.; 2.; 0.; 3.])
+
+            Assert.CheckEqual(t1AbsCorrect, t1Abs)
+            Assert.CheckEqual(t1Abs.dtype, combo.dtype)
+
+        for combo in Combos.UnsignedIntegral do 
+            let t1 = combo.tensor([1.; 2.; 0.; 3.])
+            let t1Abs = t1.abs()
+            let t1AbsCorrect = combo.tensor([1.; 2.; 0.; 3.])
+
+            Assert.CheckEqual(t1AbsCorrect, t1Abs)
+            Assert.CheckEqual(t1Abs.dtype, combo.dtype)
+
+        // Test bool separately
+        // Note: PyTorch fails on 'torch.tensor([True, False]).abs()'
+        for combo in Combos.AllDevicesAndBackendsFloat32 do
+            let t1 = combo.tensor([true; false], dtype=Dtype.Bool)
+            isInvalidOp (fun () -> t1.abs())
+
+    [<Test>]
+    member _.TestTensorReluT () =
+        for combo in Combos.SignedIntegralAndFloatingPointExcept16s do 
+            let t1 = combo.tensor([-1.; -2.; 0.; 3.; 10.])
+            let t1Relu = t1.relu()
+            let t1ReluCorrect = combo.tensor([0.; 0.; 0.; 3.; 10.])
+
+            Assert.CheckEqual(t1ReluCorrect, t1Relu)
+            Assert.CheckEqual(t1Relu.dtype, combo.dtype)
+
+        // Test bool separately
+        for combo in Combos.AllDevicesAndBackendsFloat32 do
+            let t1 = combo.tensor([true; false], dtype=Dtype.Bool)
+            isInvalidOp (fun () -> t1.relu())
+
+    [<Test>]
+    member _.TestTensorLeakyRelu () =
+        for combo in Combos.FloatingPoint do 
+            let t1 = combo.tensor([-1.; -2.; 0.; 3.; 10.])
+            let t1LeakyRelu = t1.leakyRelu()
+            let t1LeakyReluCorrect = combo.tensor([-1.0000e-02; -2.0000e-02;  0.0000e+00;  3.0000e+00;  1.0000e+01])
+
+            Assert.CheckEqual(t1LeakyReluCorrect, t1LeakyRelu)
+            Assert.CheckEqual(t1LeakyRelu.dtype, combo.dtype)
+            Assert.CheckEqual(t1LeakyRelu.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorSigmoidT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Sigmoid = t1.sigmoid()
+            let t1SigmoidCorrect = combo.tensor([0.7206; 0.6199; 0.5502; 0.6415; 0.6993])
+
+            Assert.That(t1Sigmoid.allclose(t1SigmoidCorrect, 0.01))
+            Assert.CheckEqual(t1Sigmoid.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+          isInvalidOp(fun () -> combo.tensor([1.0]).sigmoid())
+
+    [<Test>]
+    member _.TestTensorSoftplusT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([-1.9908e-01,  9.0179e-01, -5.7899e-01,  1.2083e+00, -4.0689e+04, 2.8907e+05, -6.5848e+05, -1.2992e+05])
+            let t1Softplus = t1.softplus()
+            let t1SoftplusCorrect = combo.tensor([5.9855e-01, 1.2424e+00, 4.4498e-01, 1.4697e+00, 0.0000e+00, 2.8907e+05, 0.0000e+00, 0.0000e+00])
+
+            Assert.That(t1Softplus.allclose(t1SoftplusCorrect, 0.01))
+            Assert.CheckEqual(t1Softplus.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).softplus())
+
+    [<Test>]
+    member _.TestTensorExpT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9139; -0.5907;  1.9422; -0.7763; -0.3274])
+            let t1Exp = t1.exp()
+            let t1ExpCorrect = combo.tensor([2.4940; 0.5539; 6.9742; 0.4601; 0.7208])
+
+            Assert.That(t1Exp.allclose(t1ExpCorrect, 0.01))
+            Assert.CheckEqual(t1Exp.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).exp())
+
+    [<Test>]
+    member _.TestTensorLogT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.1285; 0.5812; 0.6505; 0.3781; 0.4025])
+            let t1Log = t1.log()
+            let t1LogCorrect = combo.tensor([-2.0516; -0.5426; -0.4301; -0.9727; -0.9100])
+
+            Assert.That(t1Log.allclose(t1LogCorrect, 0.01))
+            Assert.CheckEqual(t1Log.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).log())
+
+    [<Test>]
+    member _.TestTensorSafeLog () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.; -5.; System.Double.NegativeInfinity; 0.6505; 0.3781; 0.4025])
+            let epsilon = 1e-12
+            let t1Log = t1.safelog(epsilon)
+            let t1LogCorrect = combo.tensor([-27.631, -27.631, -27.631, -0.430014, -0.972597, -0.91006])
+
+            Assert.That(t1Log.allclose(t1LogCorrect, 0.01))
+            Assert.CheckEqual(t1Log.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorLog10T () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.1285; 0.5812; 0.6505; 0.3781; 0.4025])
+            let t1Log10 = t1.log10()
+            let t1Log10Correct = combo.tensor([-0.8911; -0.2357; -0.1868; -0.4224; -0.3952])
+
+            Assert.That(t1Log10.allclose(t1Log10Correct, 0.01))
+            Assert.CheckEqual(t1Log10.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).log10())
+
+    [<Test>]
+    member _.TestTensorSqrtT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([54.7919; 70.6440; 16.0868; 74.5486; 82.9318])
+            let t1Sqrt = t1.sqrt()
+            let t1SqrtCorrect = combo.tensor([7.4022; 8.4050; 4.0108; 8.6342; 9.1067])
+
+            Assert.That(t1Sqrt.allclose(t1SqrtCorrect, 0.01))
+            Assert.CheckEqual(t1Sqrt.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).sqrt())
+
+    [<Test>]
+    member _.TestTensorSinT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([54.7919; 70.6440; 16.0868; 74.5486; 82.9318])
+            let t1Sin = t1.sin()
+            let t1SinCorrect = combo.tensor([-0.9828;  0.9991; -0.3698; -0.7510;  0.9491])
+
+            Assert.That(t1Sin.allclose(t1SinCorrect, 0.01))
+            Assert.CheckEqual(t1Sin.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).sin())
+
+    [<Test>]
+    member _.TestTensorCosT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([54.7919; 70.6440; 16.0868; 74.5486; 82.9318])
+            let t1Cos = t1.cos()
+            let t1CosCorrect = combo.tensor([-0.1849;  0.0418; -0.9291;  0.6603;  0.3150])
+
+            Assert.That(t1Cos.allclose(t1CosCorrect, 0.01))
+            Assert.CheckEqual(t1Cos.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).cos())
+
+    [<Test>]
+    member _.TestTensorTanT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 1.4891; 0.2015; 0.5818; 0.8439])
+            let t1Tan = t1.tan()
+            let t1TanCorrect = combo.tensor([1.3904; 12.2132;  0.2043;  0.6577;  1.1244])
+
+            Assert.That(t1Tan.allclose(t1TanCorrect, 0.01))
+            Assert.CheckEqual(t1Tan.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).tan())
+
+    [<Test>]
+    member _.TestTensorSinhT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 1.4891; 0.2015; 0.5818; 0.8439])
+            let t1Sinh = t1.sinh()
+            let t1SinhCorrect = combo.tensor([1.0955; 2.1038; 0.2029; 0.6152; 0.9477])
+
+            Assert.That(t1Sinh.allclose(t1SinhCorrect, 0.01))
+            Assert.CheckEqual(t1Sinh.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).sinh())
+
+    [<Test>]
+    member _.TestTensorCoshT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 1.4891; 0.2015; 0.5818; 0.8439])
+            let t1Cosh = t1.cosh()
+            let t1CoshCorrect = combo.tensor([1.4833; 2.3293; 1.0204; 1.1741; 1.3777])
+
+            Assert.That(t1Cosh.allclose(t1CoshCorrect, 0.01))
+            Assert.CheckEqual(t1Cosh.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).cosh())
+
+    [<Test>]
+    member _.TestTensorTanhT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 1.4891; 0.2015; 0.5818; 0.8439])
+            let t1Tanh = t1.tanh()
+            let t1TanhCorrect = combo.tensor([0.7386; 0.9032; 0.1988; 0.5240; 0.6879])
+
+            Assert.That(t1Tanh.allclose(t1TanhCorrect, 0.01))
+            Assert.CheckEqual(t1Tanh.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).tanh())
+
+    [<Test>]
+    member _.TestTensorAsinT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Asin = t1.asin()
+            let t1AsinCorrect = combo.tensor([1.2447; 0.5111; 0.2029; 0.6209; 1.0045])
+
+            Assert.That(t1Asin.allclose(t1AsinCorrect, 0.01))
+            Assert.CheckEqual(t1Asin.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).asin())
+
+    [<Test>]
+    member _.TestTensorAcosT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Acos = t1.acos()
+            let t1AcosCorrect = combo.tensor([0.3261; 1.0597; 1.3679; 0.9499; 0.5663])
+
+            Assert.That(t1Acos.allclose(t1AcosCorrect, 0.01))
+            Assert.CheckEqual(t1Acos.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).acos())
+
+    [<Test>]
+    member _.TestTensorAtanT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([0.9473; 0.4891; 0.2015; 0.5818; 0.8439])
+            let t1Atan = t1.atan()
+            let t1AtanCorrect = combo.tensor([0.7583; 0.4549; 0.1988; 0.5269; 0.7009])
+
+            Assert.That(t1Atan.allclose(t1AtanCorrect, 0.01))
+            Assert.CheckEqual(t1Atan.dtype, combo.dtype)
+
+        for combo in Combos.IntegralAndBool do
+            isInvalidOp(fun () -> combo.tensor([1.0]).atan())
+
+    [<Test>]
+    member _.TestTensorNumToBool () =
+        for combo in Combos.FloatingPoint do
+            let t = combo.tensor([-2., -1., -0.5, 0., 0.5, 1., 2.])
+            let tb = t.bool()
+            let tbCorrect = combo.tensor([true, true, true, false, true, true, true], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(tbCorrect, tb)
+
+        for combo in Combos.Integral do
+            let t = combo.tensor([-2., -1., 0., 1., 2.])
+            let tb = t.bool()
+            let tbCorrect = combo.tensor([true, true, false, true, true], dtype=Dtype.Bool)
+
+            Assert.CheckEqual(tbCorrect, tb)
+
+    [<Test>]
+    member _.TestTensorSliceWithNegativeIndex () =
+        for combo in Combos.All do
+            let t = combo.tensor([[[ 0,  1,  2,  3],
+                                    [ 4,  5,  6,  7],
+                                    [ 8,  9, 10, 11]],
+
+                                    [[12, 13, 14, 15],
+                                    [16, 17, 18, 19],
+                                    [20, 21, 22, 23]]])
+
+            let t1 = t[-1]
+            let t1Correct = combo.tensor([[12, 13, 14, 15],
+                                            [16, 17, 18, 19],
+                                            [20, 21, 22, 23]])
+            Assert.CheckEqual(t1Correct, t1)
+
+            let t2 = t[0, -1]
+            let t2Correct = combo.tensor([ 8,  9, 10, 11])
+            Assert.CheckEqual(t2Correct, t2)
+
+            let t3 = t[0, 0, -1]
+            let t3Correct = combo.tensor(3)
+            Assert.CheckEqual(t3Correct, t3)
+
+            let t3 = t[0, 0, -2]
+            let t3Correct = combo.tensor(2)
+            Assert.CheckEqual(t3Correct, t3)
+
+            let t3 = t[0, -2, -1]
+            let t3Correct = combo.tensor(7)
+            Assert.CheckEqual(t3Correct, t3)
+
+    [<Test>]
+    member _.TestTensorSlice () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([1.;2.])
+            let t1s1 = t1[0]
+            let t1s2 = t1[*]
+            let t1s3 = t1[1..]
+            let t1s4 = t1[..0] // In Python this is [:1] because in Python upper limits are exclusive whereas in F# they are inclusive
+            let t1s1Correct = combo.tensor(1.)
+            let t1s2Correct = combo.tensor([1.;2.])
+            let t1s3Correct = combo.tensor([2.])
+            let t1s4Correct = combo.tensor([1.])
+
+            let t2 = combo.tensor([[1.;2.];[3.;4.]])
+            let t2s1 = t2[0]
+            let t2s2 = t2[*]
+            let t2s3 = t2[0,0]
+            let t2s4 = t2[0,*]
+            let t2s5 = t2[*,0]
+            let t2s6 = t2[*,*]
+            let t2s7 = t2[1..]
+            let t2s8 = t2[..0] // In Python this is [:1] because in Python upper limits are exclusive whereas in F# they are inclusive
+            let t2s1Correct = combo.tensor([1.;2.])
+            let t2s2Correct = combo.tensor([[1.;2.];[3.;4.]])
+            let t2s3Correct = combo.tensor(1.)
+            let t2s4Correct = combo.tensor([1.;2.])
+            let t2s5Correct = combo.tensor([1.;3.])
+            let t2s6Correct = combo.tensor([[1.;2.];[3.;4.]])
+            let t2s7Correct = combo.tensor([[3.; 4.]])
+            let t2s8Correct = combo.tensor([[1.; 2.]])
+
+            let t2b = combo.tensor([[1.;2.;3.;4.]; [5.;6.;7.;8.]; [9.;10.;11.;12.]])
+            let t2bs1 = t2b[1..,2..]
+            let t2bs1Correct = combo.tensor([[7.;8.];[11.;12.]])
+            let t2bs2 = t2b[1..2,2..3]
+            let t2bs2Correct = combo.tensor([[7.;8.];[11.;12.]])
+
+            let t3 = combo.tensor([[[1.;2.];[3.;4.]];[[5.;6.];[7.;8.]]])
+            let t3s1  = t3[0]
+            let t3s2  = t3[*]
+            let t3s3  = t3[0,0]
+            let t3s4  = t3[0,*]
+            let t3s5  = t3[*,0]
+            let t3s6  = t3[*,*]
+            let t3s7  = t3[0,0,0]
+            let t3s8  = t3[0,0,*]
+            let t3s9  = t3[0,*,0]
+            let t3s10 = t3[0,*,*]
+            let t3s11 = t3[*,0,0]
+            let t3s12 = t3[*,0,*]
+            let t3s13 = t3[*,*,0]
+            let t3s14 = t3[*,*,*]
+            let t3s1Correct  = combo.tensor([[1.;2.];[3.;4.]])
+            let t3s2Correct  = combo.tensor([[[1.;2.];[3.;4.]];[[5.;6.];[7.;8.]]])
+            let t3s3Correct  = combo.tensor([1.;2.])
+            let t3s4Correct  = combo.tensor([[1.;2.];[3.;4.]])
+            let t3s5Correct  = combo.tensor([[1.;2.];[5.;6.]])
+            let t3s6Correct  = combo.tensor([[[1.;2.];[3.;4.]];[[5.;6.];[7.;8.]]])
+            let t3s7Correct  = combo.tensor(1.)
+            let t3s8Correct  = combo.tensor([1.;2.])
+            let t3s9Correct  = combo.tensor([1.;3.])
+            let t3s10Correct = combo.tensor([[1.;2.];[3.;4.]])
+            let t3s11Correct = combo.tensor([1.;5.])
+            let t3s12Correct = combo.tensor([[1.;2.];[5.;6.]])
+            let t3s13Correct = combo.tensor([[1.;3.];[5.;7.]])
+            let t3s14Correct = combo.tensor([[[1.;2.];[3.;4.]];[[5.;6.];[7.;8.]]])
+
+            let t4 = combo.tensor([[[[1.]]; 
+                                     [[2.]]; 
+                                     [[3.]]]; 
+                                    [[[4.]]; 
+                                     [[5.]]; 
+                                     [[6.]]]])
+            let t4s1 = t4[0]
+            let t4s2 = t4[0,*,*,*]
+            let t4s1Correct = combo.tensor([[[1]];
+                                             [[2]];
+                                             [[3]]])
+            let t4s2Correct = t4s1Correct
+
+            Assert.CheckEqual(t1s1Correct, t1s1)
+            Assert.CheckEqual(t1s2Correct, t1s2)
+            Assert.CheckEqual(t1s3Correct, t1s3)
+            Assert.CheckEqual(t1s4Correct, t1s4)
+
+            Assert.CheckEqual(t2s1Correct, t2s1)
+            Assert.CheckEqual(t2s2Correct, t2s2)
+            Assert.CheckEqual(t2s3Correct, t2s3)
+            Assert.CheckEqual(t2s4Correct, t2s4)
+            Assert.CheckEqual(t2s5Correct, t2s5)
+            Assert.CheckEqual(t2s6Correct, t2s6)
+            Assert.CheckEqual(t2s7Correct, t2s7)
+            Assert.CheckEqual(t2s8Correct, t2s8)
+
+            Assert.CheckEqual(t2bs1Correct, t2bs1)
+            Assert.CheckEqual(t2bs2Correct, t2bs2)
+
+            Assert.CheckEqual(t3s1Correct, t3s1)
+            Assert.CheckEqual(t3s2Correct, t3s2)
+            Assert.CheckEqual(t3s3Correct, t3s3)
+            Assert.CheckEqual(t3s4Correct, t3s4)
+            Assert.CheckEqual(t3s5Correct, t3s5)
+            Assert.CheckEqual(t3s6Correct, t3s6)
+            Assert.CheckEqual(t3s7Correct, t3s7)
+            Assert.CheckEqual(t3s8Correct, t3s8)
+            Assert.CheckEqual(t3s9Correct, t3s9)
+            Assert.CheckEqual(t3s10Correct, t3s10)
+            Assert.CheckEqual(t3s11Correct, t3s11)
+            Assert.CheckEqual(t3s12Correct, t3s12)
+            Assert.CheckEqual(t3s13Correct, t3s13)
+            Assert.CheckEqual(t3s14Correct, t3s14)
+
+            Assert.CheckEqual(t4s1Correct, t4s1)
+            Assert.CheckEqual(t4s2Correct, t4s2)
+
+            Assert.CheckEqual(t1s1.dtype, combo.dtype)
+            Assert.CheckEqual(t1s2.dtype, combo.dtype)
+
+            Assert.CheckEqual(t2s1.dtype, combo.dtype)
+            Assert.CheckEqual(t2s2.dtype, combo.dtype)
+            Assert.CheckEqual(t2s3.dtype, combo.dtype)
+            Assert.CheckEqual(t2s4.dtype, combo.dtype)
+            Assert.CheckEqual(t2s5.dtype, combo.dtype)
+            Assert.CheckEqual(t2s6.dtype, combo.dtype)
+
+            Assert.CheckEqual(t2bs1.dtype, combo.dtype)
+            Assert.CheckEqual(t2bs2.dtype, combo.dtype)
+
+            Assert.CheckEqual(t3s1.dtype, combo.dtype)
+            Assert.CheckEqual(t3s2.dtype, combo.dtype)
+            Assert.CheckEqual(t3s3.dtype, combo.dtype)
+            Assert.CheckEqual(t3s4.dtype, combo.dtype)
+            Assert.CheckEqual(t3s5.dtype, combo.dtype)
+            Assert.CheckEqual(t3s6.dtype, combo.dtype)
+            Assert.CheckEqual(t3s7.dtype, combo.dtype)
+            Assert.CheckEqual(t3s8.dtype, combo.dtype)
+            Assert.CheckEqual(t3s9.dtype, combo.dtype)
+            Assert.CheckEqual(t3s10.dtype, combo.dtype)
+            Assert.CheckEqual(t3s11.dtype, combo.dtype)
+            Assert.CheckEqual(t3s12.dtype, combo.dtype)
+            Assert.CheckEqual(t3s13.dtype, combo.dtype)
+            Assert.CheckEqual(t3s14.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorAddTTSlice () =
+        for combo in Combos.FloatingPoint do 
+            let t1 = combo.tensor([[-0.2754;  0.0172;  0.7105];
+                [-0.1890;  1.7664;  0.5377];
+                [-0.5313; -2.2530; -0.6235];
+                [ 0.6776;  1.5844; -0.5686]])
+            let t2 = combo.tensor([[-111.8892;   -7.0328];
+                [  18.7557;  -86.2308]])
+            let t3 = t1.addSlice([0;1], t2)
+            let t3Correct = combo.tensor([[  -0.2754; -111.8720;   -6.3222];
+                [  -0.1890;   20.5221;  -85.6932];
+                [  -0.5313;   -2.2530;   -0.6235];
+                [   0.6776;    1.5844;   -0.5686]])
+
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorPad () =
+        for combo in Combos.All do
+            let t1 = combo.tensor([1.,2.,3.])
+            let t1p0 = dsharp.pad(t1, [0])
+            let t1p0Correct = combo.tensor([1.,2.,3.])
+            let t1p1 = dsharp.pad(t1, [1])
+            let t1p1Correct = combo.tensor([0.,1.,2.,3.,0.])
+            let t1p2 = dsharp.pad(t1, [2])
+            let t1p2Correct = combo.tensor([0.,0.,1.,2.,3.,0.,0.])
+            let t2 = combo.tensor([[1.,2.,3.], [4.,5.,6.]])
+            let t2p00 = dsharp.pad(t2, [0;0])
+            let t2p00Correct = combo.tensor([[1.,2.,3.], [4.,5.,6.]])
+            let t2p12 = dsharp.pad(t2, [1;2])
+            let t2p12Correct = combo.tensor([[0, 0, 0, 0, 0, 0, 0],
+                                              [0, 0, 1, 2, 3, 0, 0],
+                                              [0, 0, 4, 5, 6, 0, 0],
+                                              [0, 0, 0, 0, 0, 0, 0]])
+            let t2p22 = dsharp.pad(t2, [2;2])
+            let t2p22Correct = combo.tensor([[0, 0, 0, 0, 0, 0, 0],
+                                                [0, 0, 0, 0, 0, 0, 0],
+                                                [0, 0, 1, 2, 3, 0, 0],
+                                                [0, 0, 4, 5, 6, 0, 0],
+                                                [0, 0, 0, 0, 0, 0, 0],
+                                                [0, 0, 0, 0, 0, 0, 0]])
+            Assert.CheckEqual(t1p0Correct, t1p0)
+            Assert.CheckEqual(t1p1Correct, t1p1)
+            Assert.CheckEqual(t1p2Correct, t1p2)
+            Assert.CheckEqual(t2p00Correct, t2p00)
+            Assert.CheckEqual(t2p12Correct, t2p12)
+            Assert.CheckEqual(t2p22Correct, t2p22)
+
+
+    [<Test>]
+    member _.TestTensorExpandT () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor(1.0)
+            let t1Expand = t1.expand([2;3])
+            let t1ExpandCorrect = combo.tensor([[1.;1.;1.];[1.;1.;1.]])
+            Assert.CheckEqual(t1ExpandCorrect, t1Expand)
+
+            let t2 = combo.tensor([1.0])
+            let t2Expand = t2.expand([2;3])
+            let t2ExpandCorrect = combo.tensor([[1.;1.;1.];[1.;1.;1.]])
+
+            Assert.CheckEqual(t2ExpandCorrect, t2Expand)
+
+            let t3 = combo.tensor([1.; 2.]) // 2
+            let t3Expand = t3.expand([3;2]) // 3x2
+            let t3ExpandCorrect = combo.tensor([[1.;2.];[1.;2.];[1.;2.]]) // 3x2
+
+            Assert.CheckEqual(t3ExpandCorrect, t3Expand)
+
+            let t4 = combo.tensor([[1.]; [2.]]) // 2x1
+            let t4Expand = t4.expand([2;2]) // 2x2
+            let t4ExpandCorrect = combo.tensor([[1.;1.];[2.;2.]])
+
+            Assert.CheckEqual(t4ExpandCorrect, t4Expand)
+
+            let t5 = combo.tensor([[1.]; [2.]]) // 2x1
+            let t5Expand = t5.expand([2;2;2]) // 2x2x2
+            let t5ExpandCorrect = combo.tensor([[[1.;1.];[2.;2.]];[[1.;1.];[2.;2.]]])
+
+            Assert.CheckEqual(t5ExpandCorrect, t5Expand)
+
+            let t6 = combo.tensor([[1.]; [2.]; [3.]]) // 3x1
+            let t6Expand = t6.expand([-1;4]) // 3x4
+            let t6ExpandCorrect = combo.tensor([[1.;1.;1.;1.];[2.;2.;2.;2.];[3.;3.;3.;3.]])
+
+            Assert.CheckEqual(t6ExpandCorrect, t6Expand)
+
+            isAnyException(fun () -> t6.expand([-1;3;4]))
+
+            let t6Expand2 = t6.expand([2;-1;-1]) // 2x3x1
+            let t6ExpandCorrect2 = combo.tensor([[[1.]; [2.]; [3.]] ; [[1.]; [2.]; [3.]]])
+            Assert.CheckEqual(t6ExpandCorrect2, t6Expand2)
+
+    [<Test>]
+    member _.TestTensorExpandAs () =
+        for combo in Combos.All do
+            let t1 = combo.tensor([[1], [2], [3]])
+            let t2 = combo.zeros([3;2])
+            let t1Expand = t1.expandAs(t2)
+            let t1ExpandCorrect = combo.tensor([[1, 1],
+                                                [2, 2],
+                                                [3, 3]])
+            Assert.CheckEqual(t1ExpandCorrect, t1Expand)
+
+    [<Test>]
+    member _.TestTensorSqueezeT () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([[[1.; 2.]]; [[3.;4.]]])
+            let t1Squeeze = t1.squeeze()
+            let t1SqueezeCorrect = combo.tensor([[1.;2.];[3.;4.]])
+
+            Assert.That(t1Squeeze.allclose(t1SqueezeCorrect, 0.01))
+            Assert.CheckEqual(t1Squeeze.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorUnsqueezeT () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([[1.;2.];[3.;4.]])
+            let t1Unsqueeze = t1.unsqueeze(1)
+            let t1UnsqueezeCorrect = combo.tensor([[[1.; 2.]]; [[3.;4.]]])
+
+            Assert.That(t1Unsqueeze.allclose(t1UnsqueezeCorrect, 0.01))
+            Assert.CheckEqual(t1Unsqueeze.dtype, combo.dtype)
+
+            let t = combo.tensor([1, 2, 3, 4])
+            let tUnsqueezeMinus2 = t.unsqueeze(-2)
+            let tUnsqueezeMinus2Correct = combo.tensor([[1, 2, 3, 4]])
+            let tUnsqueezeMinus1 = t.unsqueeze(-1)
+            let tUnsqueezeMinus1Correct = combo.tensor([[1],
+                                                        [2],
+                                                        [3],
+                                                        [4]])
+            let tUnsqueeze0 = t.unsqueeze(0)
+            let tUnsqueeze0Correct = combo.tensor([[1, 2, 3, 4]])            
+            let tUnsqueeze1 = t.unsqueeze(1)
+            let tUnsqueeze1Correct = combo.tensor([[1],
+                                                        [2],
+                                                        [3],
+                                                        [4]])
+
+            Assert.That(tUnsqueezeMinus2Correct.allclose(tUnsqueezeMinus2, 0.01))
+            Assert.That(tUnsqueezeMinus1Correct.allclose(tUnsqueezeMinus1, 0.01))
+            Assert.That(tUnsqueeze0Correct.allclose(tUnsqueeze0, 0.01))
+            Assert.That(tUnsqueeze1Correct.allclose(tUnsqueeze1, 0.01))
+
+    [<Test>]
+    member _.TestTensorUnsqueezeAs () =
+        for combo in Combos.All do
+            let t1 = combo.ones(3)
+            let t2 = combo.ones([3;4])
+            let t3 = combo.ones([3;4;2])
+
+            let t1u1 = t1.unsqueezeAs(t1)
+            let t1u1Shape = t1u1.shape
+            let t1u1ShapeCorrect = t1.shape
+            let t1u2 = t1.unsqueezeAs(t2)
+            let t1u2Shape = t1u2.shape
+            let t1u2ShapeCorrect = [|3;1|]
+            let t1u3 = t1.unsqueezeAs(t3)
+            let t1u3Shape = t1u3.shape
+            let t1u3ShapeCorrect = [|3;1;1|]
+            let t3u1 = t3.unsqueezeAs(t1)
+            let t3u1Shape = t3u1.shape
+            let t3u1ShapeCorrect = [|3;4;2|]
+
+            Assert.AreEqual(t1u1ShapeCorrect, t1u1Shape)
+            Assert.AreEqual(t1u2ShapeCorrect, t1u2Shape)
+            Assert.AreEqual(t1u3ShapeCorrect, t1u3Shape)
+            Assert.AreEqual(t3u1ShapeCorrect, t3u1Shape)
+
+    [<Test>]
+    member _.TestTensorFlipT () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([[1.;2.];[3.;4.]])
+            let t2 = t1.flip([|0|])
+            let t2Correct = combo.tensor([[3.;4.]; [1.;2.]])
+            let t3 = t1.flip([|1|])
+            let t3Correct = combo.tensor([[2.;1.]; [4.;3.]])
+            let t4 = t1.flip([|0; 1|])
+            let t4Correct = combo.tensor([[4.;3.]; [2.;1.]])
+            let t5 = t1.flip([|0; 1|]).flip([|0; 1|])
+            let t5Correct = combo.tensor([[1.;2.]; [3.;4.]])
+
+            Assert.CheckEqual(t2Correct, t2)
+            Assert.CheckEqual(t3Correct, t3)
+            Assert.CheckEqual(t4Correct, t4)
+            Assert.CheckEqual(t5Correct, t5)
+
+    [<Test>]
+    member _.TestTensorDilateT () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let tin1 = combo.tensor([1.;2.;3.])
+            let t1 = tin1.dilate([|2|])
+            let t1Correct = combo.tensor([1.;0.;2.;0.;3.])
+
+            Assert.CheckEqual(t1Correct, t1)
+
+            let tin2 = combo.tensor([[1.;2.]; [3.;4.]])
+            let t2 = tin2.dilate([|1; 2|])
+            let t2Correct = combo.tensor([[1.;0.;2.];[3.;0.;4.]])
+
+            Assert.CheckEqual(t2Correct, t2)
+            Assert.CheckEqual(combo.dtype, t2.dtype)
+
+            let t3 = tin2.dilate([|2; 2|])
+            let t3Correct = combo.tensor([[1.;0.;2.];[0.;0.;0.];[3.;0.;4.]])
+
+            Assert.CheckEqual(t3Correct, t3)
+            Assert.CheckEqual(combo.dtype, t3.dtype)
+
+            let tin5 = combo.tensor([1.;2.;3.;4.])
+            let t5 = tin5.dilate([|3|])
+            let t5Correct = combo.tensor([|1.;0.;0.;2.;0.;0.;3.;0.;0.;4.|])
+
+            Assert.CheckEqual(t5Correct, t5)
+            Assert.CheckEqual(combo.dtype, t5.dtype)
+
+            // Dilate 3D 1; 1; 2
+            let tin6 = combo.tensor([[[1.;2.]; [3.;4.]];[[5.;6.]; [7.;8.]]])
+            let t6 = tin6.dilate([|1; 1; 2|])
+            let t6Correct = combo.tensor([[[1.;0.;2.];[3.;0.;4.]]; [[5.;0.;6.];[7.;0.;8.]]])
+
+            Assert.CheckEqual(t6Correct, t6)
+            Assert.CheckEqual(combo.dtype, t6.dtype)
+
+            // Dilate 4D 1; 1; 1; 2
+            let tin7 = combo.tensor([[[[1.;2.]; [3.;4.]];[[5.;6.]; [7.;8.]]];[[[1.;2.]; [3.;4.]];[[5.;6.]; [7.;8.]]]])
+            let t7 = tin7.dilate([|1; 1; 1; 2|])
+            let t7Correct = combo.tensor([[[[1.;0.;2.];[3.;0.;4.]]; [[5.;0.;6.];[7.;0.;8.]]]; [[[1.;0.;2.];[3.;0.;4.]]; [[5.;0.;6.];[7.;0.;8.]]]])
+
+            Assert.CheckEqual(t7Correct, t7)
+            Assert.CheckEqual(combo.dtype, t7.dtype)
+
+            let tin8 = combo.tensor([[[1.;2.]; [3.;4.]];[[5.;6.]; [7.;8.]]])
+            let t8 = tin8.dilate([|2; 1; 2|])
+            let t8Correct = combo.tensor([[[1.;0.;2.];[3.;0.;4.]]; [[0.;0.;0.];[0.;0.;0.]]; [[5.;0.;6.];[7.;0.;8.]]])
+
+            Assert.CheckEqual(t8Correct, t8)
+            Assert.CheckEqual(combo.dtype, t8.dtype)
+
+            // Dilate 4D, 2; 1; 1; 2
+            let tin9 = combo.tensor([[[[1.;2.]; [3.;4.]];[[5.;6.]; [7.;8.]]];[[[1.;2.]; [3.;4.]];[[5.;6.]; [7.;8.]]]])
+            let t9 = tin9.dilate([|2; 1; 1; 2|])
+            let t9Correct = combo.tensor([[[[1.;0.;2.];[3.;0.;4.]]; [[5.;0.;6.];[7.;0.;8.]]]; 
+                                          [[[0.;0.;0.];[0.;0.;0.]]; [[0.;0.;0.];[0.;0.;0.]]]; 
+                                          [[[1.;0.;2.];[3.;0.;4.]]; [[5.;0.;6.];[7.;0.;8.]]]])
+
+            Assert.CheckEqual(t9Correct, t9)
+            Assert.CheckEqual(combo.dtype, t9.dtype)
+
+    [<Test>]
+    member _.TestTensorUndilateT () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([[1.;0.;2.];[3.;0.;4.]])
+            let t2 = t1.undilate([|1; 2|])
+            let t2Correct = combo.tensor([[1.;2.]; [3.;4.]])
+            let t3 = combo.tensor([[1.;0.;2.];[0.;0.;0.];[3.;0.;4.]])
+            let t4 = t3.undilate([|2; 2|])
+            let t4Correct = combo.tensor([[1.;2.]; [3.;4.]])
+            let t5 = combo.tensor([|1.;0.;0.;2.;0.;0.;3.;0.;0.;4.|])
+            let t6 = t5.undilate([|3|])
+            let t6Correct = combo.tensor([1.;2.;3.;4.])
+
+            Assert.CheckEqual(t2Correct, t2)
+            Assert.CheckEqual(t4Correct, t4)
+            Assert.CheckEqual(t6Correct, t6)
+            Assert.CheckEqual(combo.dtype, t2.dtype)
+            Assert.CheckEqual(combo.dtype, t4.dtype)
+            Assert.CheckEqual(combo.dtype, t6.dtype)
+
+    [<Test>]
+    member _.TestTensorClampT () =
+        for combo in Combos.SignedIntegralAndFloatingPointExcept16s do 
+            let t = combo.tensor([-4,-3,-2,-1,0,1,2,3,4])
+            let tClamped = dsharp.clamp(t, -2, 3)
+            let tClampedCorrect = combo.tensor([-2, -2, -2, -1,  0,  1,  2,  3,  3])
+            Assert.CheckEqual(tClampedCorrect, tClamped)
+
+    [<Test>]
+    member _.TestTensorClampInf () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.tensor([System.Double.NegativeInfinity, System.Double.PositiveInfinity])
+            let tClamped = dsharp.clamp(t, -100, 100)
+            let tClampedCorrect = combo.tensor([-100, 100])
+            Assert.CheckEqual(tClampedCorrect, tClamped)
+
+    [<Test>]
+    member _.TestTensorView () =
+        for combo in Combos.All do 
+            let t = combo.randint(0, 2, [10;10])
+            let t1 = t.view(-1)
+            let t1Shape = t1.shape
+            let t1ShapeCorrect = [|100|]
+            let t2Shape = t.view([-1;50]).shape
+            let t2ShapeCorrect = [|2;50|]
+            let t3Shape = t.view([2;-1;50]).shape
+            let t3ShapeCorrect = [|2;1;50|]
+            let t4Shape = t.view([2;-1;10]).shape
+            let t4ShapeCorrect = [|2;5;10|]
+        
+            Assert.CheckEqual(t1ShapeCorrect, t1Shape)
+            Assert.CheckEqual(t2ShapeCorrect, t2Shape)
+            Assert.CheckEqual(t3ShapeCorrect, t3Shape)
+            Assert.CheckEqual(t4ShapeCorrect, t4Shape)
+            Assert.CheckEqual(t1.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorViewAs () =
+        for combo in Combos.All do
+            let t1 = combo.tensor([1,2,3,4,5,6])
+            let t2 = combo.zeros([3;2])
+            let t1View = t1.viewAs(t2)
+            let t1ViewCorrect = combo.tensor([[1, 2],
+                                                [3, 4],
+                                                [5, 6]])
+            Assert.CheckEqual(t1ViewCorrect, t1View)
+
+    [<Test>]
+    member _.TestTensorFlatten () =
+        for combo in Combos.All do 
+            let t1 = combo.randint(0, 2, [5;5;5;5])
+            let t1f1shape = dsharp.flatten(t1).shape
+            let t1f1shapeCorrect = [|625|]
+            let t1f2shape = dsharp.flatten(t1, startDim=1).shape
+            let t1f2shapeCorrect = [|5; 125|]
+            let t1f3shape = dsharp.flatten(t1, startDim=1, endDim=2).shape
+            let t1f3shapeCorrect = [|5; 25; 5|]
+
+            let t2 = combo.randint(0, 2, 5)
+            let t2fshape = dsharp.flatten(t2).shape
+            let t2fshapeCorrect = [|5|]
+
+            let t3 = combo.tensor(2.5)
+            let t3fshape = dsharp.flatten(t3).shape
+            let t3fshapeCorrect = [||]
+
+            Assert.CheckEqual(t1f1shapeCorrect, t1f1shape)
+            Assert.CheckEqual(t1f2shapeCorrect, t1f2shape)
+            Assert.CheckEqual(t1f3shapeCorrect, t1f3shape)
+            Assert.CheckEqual(t2fshapeCorrect, t2fshape)
+            Assert.CheckEqual(t3fshapeCorrect, t3fshape)
+
+    [<Test>]
+    member _.TestTensorUnflatten () =
+        for combo in Combos.All do
+            let t1 = combo.randint(0, 2, [20; 20])
+
+            let t1f1shape = dsharp.unflatten(t1, 0, [2;10]).shape
+            let t1f1shapeCorrect = [|2;10;20|]
+
+            let t1f2shape = dsharp.unflatten(t1, 1, [2;10]).shape
+            let t1f2shapeCorrect = [|20;2;10|]
+
+            Assert.CheckEqual(t1f1shapeCorrect, t1f1shape)
+            Assert.CheckEqual(t1f2shapeCorrect, t1f2shape)
+
+    [<Test>]
+    member _.TestTensorGather () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([1,2,3,4,5])
+            let t1g = dsharp.gather(t1, 0, combo.tensor([0,2,3], dtype=Dtype.Int32))
+            let t1gCorrect = combo.tensor([1, 3, 4])
+
+            let t2 = combo.tensor([[1,2],[3,4]])
+            let t2g0 = dsharp.gather(t2, 0, combo.tensor([[0,1],[1,0]], dtype=Dtype.Int32))
+            let t2g0Correct = combo.tensor([[1, 4],
+                                             [3, 2]])
+            let t2g1 = dsharp.gather(t2, 1, combo.tensor([[0,0,1],[1,0,0]], dtype=Dtype.Int32))
+            let t2g1Correct = combo.tensor([[1, 1, 2],
+                                             [4, 3, 3]])
+
+            Assert.CheckEqual(t1gCorrect, t1g)
+            Assert.CheckEqual(combo.dtype, t1g.dtype)
+
+            Assert.CheckEqual(t2g0Correct, t2g0)
+            Assert.CheckEqual(combo.dtype, t2g0.dtype)
+
+            Assert.CheckEqual(t2g1Correct, t2g1)
+            Assert.CheckEqual(combo.dtype, t2g1.dtype)
+
+    [<Test>]
+    member _.TestTensorScatter () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([0,1,2,3,4])
+            let t1g = dsharp.scatter(t1, 0, combo.tensor([0, 2, 1, 3, 4], dtype=Dtype.Int32), destinationShape=[5])
+            let t1gCorrect = combo.tensor([0., 2., 1., 3., 4.])
+
+            let t2 = combo.tensor([[1,2,3],[4,5,6]])
+            let t2g0 = dsharp.scatter(t2, 0, combo.tensor([[0, 1, 1], [1, 0, 0]], dtype=Dtype.Int32), destinationShape=[2;3])
+            let t2g0Correct = combo.tensor([[1., 5., 6.],
+                                             [4., 2., 3.]])
+            let t2g1 = dsharp.scatter(t2, 1, combo.tensor([[0, 2, 1], [2, 0, 1]], dtype=Dtype.Int32), destinationShape=[2;3])
+            let t2g1Correct = combo.tensor([[1., 3., 2.],
+                                             [5., 6., 4.]])
+
+            Assert.That(t1gCorrect.allclose(t1g, 0.01))
+            Assert.CheckEqual(combo.dtype, t1g.dtype)
+
+            Assert.That(t2g0Correct.allclose(t2g0, 0.01))
+            Assert.CheckEqual(combo.dtype, t2g0.dtype)
+
+            Assert.That(t2g1Correct.allclose(t2g1, 0.01))
+            Assert.CheckEqual(combo.dtype, t2g1.dtype)
+
+    [<Test>]
+    member _.TestTensorMaxElementwise () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([4.;1.;20.;3.])
+            let t2 = combo.tensor([1.;3.;21.;2.])
+            let t1Max = t1.max(t2)
+            let t1MaxCorrect = combo.tensor([4.;3.;21.;3.])
+
+            Assert.CheckEqual(t1MaxCorrect, t1Max)
+            Assert.CheckEqual(combo.dtype, t1Max.dtype)
+
+            let t2 = combo.tensor([4.;1.;0.;3.])
+            let t2Max = t2.max(t2.scalarLike(2))
+            let t2MaxCorrect = combo.tensor([4.;2.;2.;3.])
+
+            Assert.CheckEqual(t2MaxCorrect, t2Max)
+            Assert.CheckEqual(combo.dtype, t2Max.dtype)
+
+    [<Test>]
+    member _.TestTensorMinElementwise () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([4.;1.;20.;3.])
+            let t2 = combo.tensor([1.;3.;21.;2.])
+            let t1Min = t1.min(t2)
+            let t1MinCorrect = combo.tensor([1.;1.;20.;2.])
+
+            Assert.CheckEqual(t1MinCorrect, t1Min)
+            Assert.CheckEqual(combo.dtype, t1Min.dtype)
+
+            let t2 = combo.tensor([4.;1.;0.;3.])
+            let t2Min = t2.min(t2.scalarLike(1))
+            let t2MinCorrect = combo.tensor([1.;1.;0.;1.])
+
+            Assert.CheckEqual(t2MinCorrect, t2Min)
+            Assert.CheckEqual(combo.dtype, t2Min.dtype)
+
+    [<Test>]
+    member _.TestTensorMax () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([4.;1.;20.;3.])
+            let t1Max = t1.max()
+            let t1MaxCorrect = combo.tensor(20.)
+
+            let t2 = combo.tensor([[1.;4.];[2.;3.]])
+            let t2Max = t2.max()
+            let t2MaxCorrect = combo.tensor(4.)
+
+            let t3 = combo.tensor([[[ 7.6884; 65.9125;  4.0114];
+                                 [46.7944; 61.5331; 40.1627];
+                                 [48.3240;  4.9910; 50.1571]];
+
+                                [[13.4777; 65.7656; 36.8161];
+                                 [47.8268; 42.2229;  5.6115];
+                                 [43.4779; 77.8675; 95.7660]];
+
+                                [[59.8422; 47.1146; 36.7614];
+                                 [71.6328; 18.5912; 27.7328];
+                                 [49.9120; 60.3023; 53.0838]]])
+
+            let t3Max = t3.max()
+            let t3MaxCorrect = combo.tensor(95.7660)
+        
+            let t4 = combo.tensor([[[[8.8978; 8.0936];
+                                  [4.8087; 1.0921];
+                                  [8.5664; 3.7814]];
+
+                                 [[2.3581; 3.7361];
+                                  [1.0436; 6.0353];
+                                  [7.7843; 8.7153]];
+
+                                 [[3.9188; 6.7906];
+                                  [9.1242; 4.8711];
+                                  [1.7870; 9.7456]];
+                                 [[5.0444; 0.5447];
+                                  [6.2945; 5.9047];
+                                  [8.0867; 3.1606]]]])
+
+            let t4Max = t4.max()
+            let t4MaxCorrect = combo.tensor(9.7456)
+
+            Assert.CheckEqual(t1MaxCorrect, t1Max)
+            Assert.CheckEqual(t2MaxCorrect, t2Max)
+            Assert.CheckEqual(t3MaxCorrect, t3Max)
+            Assert.CheckEqual(t4MaxCorrect, t4Max)
+            Assert.CheckEqual(t1Max.dtype, combo.dtype)
+            Assert.CheckEqual(t2Max.dtype, combo.dtype)
+            Assert.CheckEqual(t3Max.dtype, combo.dtype)
+            Assert.CheckEqual(t4Max.dtype, combo.dtype)
+
+
+    [<Test>]
+    member _.TestTensorMin () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([4.;1.;20.;3.])
+            let t1Min = t1.min()
+            let t1MinCorrect = combo.tensor(1.)
+
+            let t2 = combo.tensor([[1.;4.];[2.;3.]])
+            let t2Min = t2.min()
+            let t2MinCorrect = combo.tensor(1.)
+
+            let t3 = combo.tensor([[[ 7.6884; 65.9125;  4.0114];
+                 [46.7944; 61.5331; 40.1627];
+                 [48.3240;  4.9910; 50.1571]];
+
+                [[13.4777; 65.7656; 36.8161];
+                 [47.8268; 42.2229;  5.6115];
+                 [43.4779; 77.8675; 95.7660]];
+
+                [[59.8422; 47.1146; 36.7614];
+                 [71.6328; 18.5912; 27.7328];
+                 [49.9120; 60.3023; 53.0838]]])
+            let t3Min = t3.min()
+            let t3MinCorrect = combo.tensor(4.0114)
+       
+            let t4 = combo.tensor([[[[8.8978; 8.0936];
+                  [4.8087; 1.0921];
+                  [8.5664; 3.7814]];
+
+                 [[2.3581; 3.7361];
+                  [1.0436; 6.0353];
+                  [7.7843; 8.7153]];
+
+                 [[3.9188; 6.7906];
+                  [9.1242; 4.8711];
+                  [1.7870; 9.7456]];
+
+                 [[5.7825; 8.0450];
+                  [2.7801; 1.0877];
+                  [3.4042; 5.1911]]];
+
+                [[[0.5370; 7.1115];
+                  [5.4971; 2.3567];
+                  [0.9318; 8.6992]];
+
+                 [[3.3796; 8.7833];
+                  [5.8722; 5.9881];
+                  [0.7646; 7.3685]];
+
+                 [[7.5344; 9.6162];
+                  [2.6404; 4.3938];
+                  [3.1335; 7.6783]];
+
+                 [[5.0444; 0.5447];
+                  [6.2945; 5.9047];
+                  [8.0867; 3.1606]]]])
+            let t4Min = t4.min()
+            let t4MinCorrect = combo.tensor(0.5370)
+
+            Assert.CheckEqual(t1MinCorrect, t1Min)
+            Assert.CheckEqual(t2MinCorrect, t2Min)
+            Assert.CheckEqual(t3MinCorrect, t3Min)
+            Assert.CheckEqual(t4MinCorrect, t4Min)
+            Assert.CheckEqual(t1Min.dtype, combo.dtype)
+            Assert.CheckEqual(t2Min.dtype, combo.dtype)
+            Assert.CheckEqual(t3Min.dtype, combo.dtype)
+            Assert.CheckEqual(t4Min.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorMaxDim () =
+        for combo in Combos.All do
+            let t = combo.tensor([[5.5834, 5.6240],
+                                    [0.7616, 1.8576],
+                                    [7.3116, 9.7464]])
+
+            let tmax0 = t.max(dim=0)
+            let tmax0Correct = combo.tensor([7.3116, 9.7464])
+            let tmax0KeepDim = t.max(dim=0, keepDim=true)
+            let tmax0KeepDimCorrect = combo.tensor([[7.3116, 9.7464]])
+
+            let tmax1 = t.max(dim=1)
+            let tmax1Correct = combo.tensor([5.6240, 1.8576, 9.7464])
+            let tmax1KeepDim = t.max(dim=1, keepDim=true)
+            let tmax1KeepDimCorrect = combo.tensor([[5.6240],
+                                                    [1.8576],
+                                                    [9.7464]])
+
+            Assert.That(tmax0Correct.allclose(tmax0, 0.01))
+            Assert.That(tmax0KeepDimCorrect.allclose(tmax0KeepDim, 0.01))
+            Assert.That(tmax1Correct.allclose(tmax1, 0.01))
+            Assert.That(tmax1KeepDimCorrect.allclose(tmax1KeepDim, 0.01))
+
+    [<Test>]
+    member _.TestTensorMinDim () =
+        for combo in Combos.All do
+            let t = combo.tensor([[5.5834, 5.6240],
+                                    [0.7616, 1.8576],
+                                    [7.3116, 9.7464]])
+
+            let tmin0 = t.min(dim=0)
+            let tmin0Correct = combo.tensor([0.7616, 1.8576])
+            let tmin0KeepDim = t.min(dim=0, keepDim=true)
+            let tmin0KeepDimCorrect = combo.tensor([[0.7616, 1.8576]])
+
+            let tmin1 = t.min(dim=1)
+            let tmin1Correct = combo.tensor([5.5834, 0.7616, 7.3116])
+            let tmin1KeepDim = t.min(dim=1, keepDim=true)
+            let tmin1KeepDimCorrect = combo.tensor([[5.5834],
+                                                    [0.7616],
+                                                    [7.3116]])
+
+            Assert.That(tmin0Correct.allclose(tmin0, 0.01))
+            Assert.That(tmin0KeepDimCorrect.allclose(tmin0KeepDim, 0.01))
+            Assert.That(tmin1Correct.allclose(tmin1, 0.01))
+            Assert.That(tmin1KeepDimCorrect.allclose(tmin1KeepDim, 0.01))
+
+    [<Test>]
+    member _.TestTensorArgmax () =
+        for combo in Combos.IntegralAndFloatingPointExcept16s do 
+            let t1 = combo.tensor([4.;1.;20.;3.])
+            let t1Argmax = t1.argmax(0)
+            let t1ArgmaxCorrect = combo.tensor(2, dtype=Dtype.Int32)
+
+            let t1ArgmaxKeepDim = dsharp.argmax(t1, 0, keepDim=true)
+            let t1ArgmaxKeepDimCorrect = combo.tensor([2], dtype=Dtype.Int32)
+
+            let t2 = combo.tensor([[1.;4.];[2.;3.]])
+            let t2Argmax = t2.argmax(0)
+            let t2ArgmaxCorrect = combo.tensor([1,0], dtype=Dtype.Int32)
+
+            let t2ArgmaxKeepDim = t2.argmax(0, keepDim=true)
+            let t2ArgmaxKeepDimCorrect = combo.tensor([[1;0]], dtype=Dtype.Int32)
+
+            let t2ArgmaxKeepDim1 = t2.argmax(1, keepDim=true)
+            let t2ArgmaxKeepDim1Correct = combo.tensor([[1];[1]], dtype=Dtype.Int32)
+
+            let t3 = combo.tensor([[[ 7.6884; 65.9125;  4.0114];
+                                 [46.7944; 61.5331; 40.1627];
+                                 [48.3240;  4.9910; 50.1571]];
+
+                                [[13.4777; 65.7656; 36.8161];
+                                 [47.8268; 42.2229;  5.6115];
+                                 [43.4779; 77.8675; 95.7660]];
+
+                                [[59.8422; 47.1146; 36.7614];
+                                 [71.6328; 18.5912; 27.7328];
+                                 [49.9120; 60.3023; 53.0838]]])
+
+            let t3Argmax0 = t3.argmax(0)
+            let t3Argmax0Correct = combo.tensor([[2, 0, 1], [2, 0, 0], [2, 1, 1]],dtype=Dtype.Int32)
+        
+            let t3Argmax1 = t3.argmax(1)
+            let t3Argmax1Correct = combo.tensor([[2, 0, 2], [1, 2, 2], [1, 2, 2]],dtype=Dtype.Int32)
+        
+            let t3Argmax2 = t3.argmax(2)
+            let t3Argmax2Correct = combo.tensor([[1, 1, 2],[1, 0, 2],[0, 0, 1]],dtype=Dtype.Int32)
+        
+            let t4 = combo.tensor([[[[1;2]]]])
+            let t4Argmax = t4.argmax()
+            let t4ArgmaxCorrect = [| 0;0;0;1 |]
+
+            let t4Argmax0 = t4.argmax(0)
+            let t4Argmax0Correct = combo.tensor([[[0;0]]],dtype=Dtype.Int32)
+
+            let t5 = combo.tensor([[[[1;2]]]]).unsqueeze(0)
+            let t5Argmax = t5.argmax()
+            let t5ArgmaxCorrect = [| 0;0;0;0;1 |]
+
+            let t5Argmax0 = t5.argmax(0)
+            let t5Argmax0Correct = combo.tensor([[[[0;0]]]],dtype=Dtype.Int32)
+
+            let t6 = combo.tensor([[[[1;2]]]]).unsqueeze(0).unsqueeze(0)
+            let t6Argmax = t6.argmax()
+            let t6ArgmaxCorrect = [| 0;0;0;0;0;1 |]
+
+            let t6Argmax0 = t6.argmax(0)
+            let t6Argmax0Correct = combo.tensor([[[[0;0]]]],dtype=Dtype.Int32).unsqueeze(0)
+
+            Assert.CheckEqual(t1ArgmaxCorrect, t1Argmax)
+            Assert.CheckEqual(t1ArgmaxKeepDimCorrect, t1ArgmaxKeepDim)
+
+            Assert.CheckEqual(t2ArgmaxCorrect, t2Argmax)
+            Assert.CheckEqual(t2ArgmaxKeepDimCorrect, t2ArgmaxKeepDim)
+            Assert.CheckEqual(t2ArgmaxKeepDim1Correct, t2ArgmaxKeepDim1)
+
+            Assert.CheckEqual(t3Argmax0Correct, t3Argmax0)
+            Assert.CheckEqual(t3Argmax1Correct, t3Argmax1)
+            Assert.CheckEqual(t3Argmax2Correct, t3Argmax2)
+
+            Assert.CheckEqual(t4ArgmaxCorrect, t4Argmax)
+            Assert.CheckEqual(t4Argmax0Correct, t4Argmax0)
+
+            Assert.CheckEqual(t5ArgmaxCorrect, t5Argmax)
+            Assert.CheckEqual(t5Argmax0Correct, t5Argmax0)
+
+            Assert.CheckEqual(t6ArgmaxCorrect, t6Argmax)
+            Assert.CheckEqual(t6Argmax0Correct, t6Argmax0)
+
+        for combo in Combos.Bool do
+            let t1 = combo.tensor([true; false])
+            let t1Argmax = t1.argmax()
+            let t1ArgmaxCorrect = [| 0 |]
+            Assert.CheckEqual(t1ArgmaxCorrect, t1Argmax)
+
+            let t2 = combo.tensor([[true; false];[false; true]])
+            let t2Argmax = t2.argmax(0)
+            let t2ArgmaxCorrect = combo.tensor([0; 1], dtype=Dtype.Int32)
+            Assert.CheckEqual(t2ArgmaxCorrect, t2Argmax)
+
+    [<Test>]
+    member _.TestTensorArgmin () =
+        for combo in Combos.IntegralAndFloatingPointExcept16s do 
+            let t1 = combo.tensor([4.;1.;20.;3.])
+            let t1Argmin = t1.argmin(0)
+            let t1ArgminCorrect = combo.tensor(1, dtype=Dtype.Int32)
+
+            let t1ArgminKeepDim = dsharp.argmin(t1, 0, keepDim=true)
+            let t1ArgminKeepDimCorrect = combo.tensor([1], dtype=Dtype.Int32)
+
+            let t2 = combo.tensor([[1.;4.];[2.;3.]])
+            let t2Argmin = t2.argmin(0)
+            let t2ArgminCorrect = combo.tensor([0,1], dtype=Dtype.Int32)
+
+            let t2ArgminKeepDim = t2.argmin(0, keepDim=true)
+            let t2ArgminKeepDimCorrect = combo.tensor([[0,1]], dtype=Dtype.Int32)
+
+            let t2ArgminKeepDim1 = t2.argmin(1, keepDim=true)
+            let t2ArgminKeepDim1Correct = combo.tensor([[0],[0]], dtype=Dtype.Int32)
+
+            let t3 = combo.tensor([[[ 7.6884; 65.9125;  4.0114];
+                                 [46.7944; 61.5331; 40.1627];
+                                 [48.3240;  4.9910; 50.1571]];
+
+                                [[13.4777; 65.7656; 36.8161];
+                                 [47.8268; 42.2229;  5.6115];
+                                 [43.4779; 77.8675; 95.7660]];
+
+                                [[59.8422; 47.1146; 36.7614];
+                                 [71.6328; 18.5912; 27.7328];
+                                 [49.9120; 60.3023; 53.0838]]])
+
+            let t3Argmin0 = t3.argmin(0)
+            let t3Argmin0Correct = combo.tensor([[0, 2, 0],[0, 2, 1],[1, 0, 0]],dtype=Dtype.Int32)
+        
+            let t3Argmin1 = t3.argmin(1)
+            let t3Argmin1Correct = combo.tensor([[0, 2, 0],[0, 1, 1],[2, 1, 1]],dtype=Dtype.Int32)
+        
+            let t3Argmin2 = t3.argmin(2)
+            let t3Argmin2Correct = combo.tensor([[2, 2, 1],[0, 2, 0],[2, 1, 0]],dtype=Dtype.Int32)
+
+            let t4 = combo.tensor([[[[1;2]]]])
+            let t4Argmin = t4.argmin()
+            let t4ArgminCorrect = [| 0;0;0;0 |]
+
+            let t4Argmin0 = t4.argmin(0)
+            let t4Argmin0Correct = combo.tensor([[[0;0]]],dtype=Dtype.Int32)
+
+            let t5 = combo.tensor([[[[1;2]]]]).unsqueeze(0)
+            let t5Argmin = t5.argmin()
+            let t5ArgminCorrect = [| 0;0;0;0;0 |]
+
+            let t5Argmin0 = t5.argmin(0)
+            let t5Argmin0Correct = combo.tensor([[[[0;0]]]],dtype=Dtype.Int32)
+
+            let t6 = combo.tensor([[[[1;2]]]]).unsqueeze(0).unsqueeze(0)
+            let t6Argmin = t6.argmin()
+            let t6ArgminCorrect = [| 0;0;0;0;0;0 |]
+
+            let t6Argmin0 = t6.argmin(0)
+            let t6Argmin0Correct = combo.tensor([[[[0;0]]]],dtype=Dtype.Int32).unsqueeze(0)
+
+            Assert.CheckEqual(t1ArgminCorrect, t1Argmin)
+            Assert.CheckEqual(t1ArgminKeepDimCorrect, t1ArgminKeepDim)
+
+            Assert.CheckEqual(t2ArgminCorrect, t2Argmin)
+            Assert.CheckEqual(t2ArgminKeepDimCorrect, t2ArgminKeepDim)
+            Assert.CheckEqual(t2ArgminKeepDim1Correct, t2ArgminKeepDim1)
+
+            Assert.CheckEqual(t3Argmin0Correct, t3Argmin0)
+            Assert.CheckEqual(t3Argmin1Correct, t3Argmin1)
+            Assert.CheckEqual(t3Argmin2Correct, t3Argmin2)
+
+            Assert.CheckEqual(t4ArgminCorrect, t4Argmin)
+            Assert.CheckEqual(t4Argmin0Correct, t4Argmin0)
+
+            Assert.CheckEqual(t5ArgminCorrect, t5Argmin)
+            Assert.CheckEqual(t5Argmin0Correct, t5Argmin0)
+
+            Assert.CheckEqual(t6ArgminCorrect, t6Argmin)
+            Assert.CheckEqual(t6Argmin0Correct, t6Argmin0)
+
+        for combo in Combos.Bool do
+            let t1 = combo.tensor([true; false])
+            let t1Argmin = t1.argmin()
+            let t1ArgminCorrect = [| 1 |]
+            Assert.CheckEqual(t1ArgminCorrect, t1Argmin)
+
+            let t2 = combo.tensor([[true; false];[false; true]])
+            let t2Argmin = t2.argmin(0)
+            let t2ArgminCorrect = combo.tensor([1; 0], dtype=Dtype.Int32)
+            Assert.CheckEqual(t2ArgminCorrect, t2Argmin)
+
+    [<Test>]
+    member _.TestTensorMaxBinary () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([[-4.9385; 12.6206; 10.1783];
+                [-2.9624; 17.6992;  2.2506];
+                [-2.3536;  8.0772; 13.5639]])
+            let t2 = combo.tensor([[  0.7027;  22.3251; -11.4533];
+                [  3.6887;   4.3355;   3.3767];
+                [  0.1203;  -5.4088;   1.5658]])
+            let t3 = t1.max(t2)
+            let t3Correct = combo.tensor([[ 0.7027; 22.3251; 10.1783];
+                [ 3.6887; 17.6992;  3.3767];
+                [ 0.1203;  8.0772; 13.5639]])
+
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorMinBinary () =
+        for combo in Combos.FloatingPoint do 
+            let t1 = combo.tensor([[-4.9385; 12.6206; 10.1783];
+                [-2.9624; 17.6992;  2.2506];
+                [-2.3536;  8.0772; 13.5639]])
+            let t2 = combo.tensor([[  0.7027;  22.3251; -11.4533];
+                [  3.6887;   4.3355;   3.3767];
+                [  0.1203;  -5.4088;   1.5658]])
+            let t3 = t1.min(t2)
+            let t3Correct = combo.tensor([[ -4.9385;  12.6206; -11.4533];
+                [ -2.9624;   4.3355;   2.2506];
+                [ -2.3536;  -5.4088;   1.5658]])
+
+            Assert.That(t3.allclose(t3Correct, 0.01))
+            Assert.CheckEqual(t3.dtype, combo.dtype)
+
+    [<Test>]
+    member _.TestTensorSoftmax () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([2.7291; 0.0607; 0.8290])
+            let t1Softmax0 = t1.softmax(0)
+            let t1Softmax0Correct = combo.tensor([0.8204; 0.0569; 0.1227])
+
+            let t2 = combo.tensor([[1.3335; 1.6616; 2.4874; 6.1722];
+                [3.3478; 9.3019; 1.0844; 8.9874];
+                [8.6300; 1.8842; 9.1387; 9.1321]])
+            let t2Softmax0 = t2.softmax(0)
+            let t2Softmax0Correct = combo.tensor([[6.7403e-04; 4.8014e-04; 1.2904e-03; 2.7033e-02];
+                [5.0519e-03; 9.9892e-01; 3.1723e-04; 4.5134e-01];
+                [9.9427e-01; 5.9987e-04; 9.9839e-01; 5.2163e-01]])
+            let t2Softmax1 = t2.softmax(1)
+            let t2Softmax1Correct = combo.tensor([[7.5836e-03; 1.0528e-02; 2.4044e-02; 9.5784e-01];
+                [1.4974e-03; 5.7703e-01; 1.5573e-04; 4.2131e-01];
+                [2.3167e-01; 2.7240e-04; 3.8528e-01; 3.8277e-01]])
+
+            let t3 = combo.tensor([[[3.0897; 2.0902];
+                 [2.4055; 1.2437];
+                 [2.1253; 8.7802];
+                 [4.3856; 3.4456]];
+
+                [[8.6233; 6.9789];
+                 [4.9583; 9.9497];
+                 [2.6964; 1.6048];
+                 [2.1182; 2.1071]];
+
+                [[8.1097; 6.9804];
+                 [8.1223; 6.3030];
+                 [0.1873; 8.7840];
+                 [9.3609; 0.6493]]])
+             
+            let t3Softmax0 = t3.softmax(0)
+            let t3Softmax0Correct = combo.tensor([[[2.4662e-03; 3.7486e-03];
+                 [3.1467e-03; 1.6136e-04];
+                 [3.4316e-01; 4.9885e-01];
+                 [6.8542e-03; 7.5571e-01]];
+
+                [[6.2411e-01; 4.9776e-01];
+                 [4.0415e-02; 9.7443e-01];
+                 [6.0743e-01; 3.8170e-04];
+                 [7.0995e-04; 1.9817e-01]];
+
+                [[3.7342e-01; 4.9849e-01];
+                 [9.5644e-01; 2.5410e-02];
+                 [4.9412e-02; 5.0077e-01];
+                 [9.9244e-01; 4.6122e-02]]])
+            let t3Softmax1 = t3.softmax(1)
+            let t3Softmax1Correct = combo.tensor([[[1.8050e-01; 1.2351e-03];
+                 [9.1058e-02; 5.2978e-04];
+                 [6.8813e-02; 9.9344e-01];
+                 [6.5963e-01; 4.7904e-03]];
+
+                [[9.7109e-01; 4.8732e-02];
+                 [2.4864e-02; 9.5067e-01];
+                 [2.5896e-03; 2.2587e-04];
+                 [1.4526e-03; 3.7327e-04]];
+
+                [[1.8156e-01; 1.3190e-01];
+                 [1.8387e-01; 6.6997e-02];
+                 [6.5824e-05; 8.0087e-01];
+                 [6.3451e-01; 2.3479e-04]]])
+            let t3Softmax2 = t3.softmax(2)
+            let t3Softmax2Correct = combo.tensor([[[7.3096e-01; 2.6904e-01];
+                 [7.6165e-01; 2.3835e-01];
+                 [1.2861e-03; 9.9871e-01];
+                 [7.1910e-01; 2.8090e-01]];
+
+                [[8.3814e-01; 1.6186e-01];
+                 [6.7502e-03; 9.9325e-01];
+                 [7.4868e-01; 2.5132e-01];
+                 [5.0278e-01; 4.9722e-01]];
+
+                [[7.5571e-01; 2.4429e-01];
+                 [8.6049e-01; 1.3951e-01];
+                 [1.8468e-04; 9.9982e-01];
+                 [9.9984e-01; 1.6463e-04]]])
+
+            Assert.That(t1Softmax0.allclose(t1Softmax0Correct, 0.001))
+            Assert.That(t2Softmax0.allclose(t2Softmax0Correct, 0.001))
+            Assert.That(t2Softmax1.allclose(t2Softmax1Correct, 0.001))
+            Assert.That(t3Softmax0.allclose(t3Softmax0Correct, 0.001))
+            Assert.That(t3Softmax1.allclose(t3Softmax1Correct, 0.001))
+            Assert.That(t3Softmax2.allclose(t3Softmax2Correct, 0.001))
+            Assert.CheckEqual(t1Softmax0.dtype, combo.dtype)
+            Assert.CheckEqual(t2Softmax0.dtype, combo.dtype)
+            Assert.CheckEqual(t2Softmax1.dtype, combo.dtype)
+            Assert.CheckEqual(t3Softmax0.dtype, combo.dtype)
+            Assert.CheckEqual(t3Softmax1.dtype, combo.dtype)
+            Assert.CheckEqual(t3Softmax2.dtype, combo.dtype)
+
+
+    [<Test>]
+    member _.TestTensorLogsoftmax () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([2.7291, 0.0607, 0.8290])
+            let t1Logsoftmax0 = t1.logsoftmax(0)
+            let t1Logsoftmax0Correct = combo.tensor([-0.1980, -2.8664, -2.0981])
+
+            let t2 = combo.tensor([[1.3335, 1.6616, 2.4874, 6.1722],
+                                    [3.3478, 9.3019, 1.0844, 8.9874],
+                                    [8.6300, 1.8842, 9.1387, 9.1321]])
+            let t2Logsoftmax0 = t2.logsoftmax(0)
+            let t2Logsoftmax0Correct = combo.tensor([[-7.3022e+00, -7.6414e+00, -6.6529e+00, -3.6107e+00],
+                                                        [-5.2879e+00, -1.0806e-03, -8.0559e+00, -7.9552e-01],
+                                                        [-5.7426e-03, -7.4188e+00, -1.6088e-03, -6.5082e-01]])
+            let t2Logsoftmax1 = t2.logsoftmax(1)
+            let t2Logsoftmax1Correct = combo.tensor([[-4.8818, -4.5537, -3.7279, -0.0431],
+                                                        [-6.5040, -0.5499, -8.7674, -0.8644],
+                                                        [-1.4624, -8.2082, -0.9537, -0.9603]])
+
+            let t3 = combo.tensor([[[3.0897, 2.0902],
+                                     [2.4055, 1.2437],
+                                     [2.1253, 8.7802],
+                                     [4.3856, 3.4456]],
+
+                                    [[8.6233, 6.9789],
+                                     [4.9583, 9.9497],
+                                     [2.6964, 1.6048],
+                                     [2.1182, 2.1071]],
+
+                                    [[8.1097, 6.9804],
+                                     [8.1223, 6.3030],
+                                     [0.1873, 8.7840],
+                                     [9.3609, 0.6493]]])
+             
+            let t3Logsoftmax0 = t3.logsoftmax(0)
+            let t3Logsoftmax0Correct = combo.tensor([[[-6.0050e+00, -5.5864e+00],
+                                                         [-5.7613e+00, -8.7319e+00],
+                                                         [-1.0696e+00, -6.9543e-01],
+                                                         [-4.9829e+00, -2.8011e-01]],
+
+                                                        [[-4.7143e-01, -6.9765e-01],
+                                                         [-3.2085e+00, -2.5904e-02],
+                                                         [-4.9850e-01, -7.8708e+00],
+                                                         [-7.2503e+00, -1.6186e+00]],
+
+                                                        [[-9.8503e-01, -6.9615e-01],
+                                                         [-4.4540e-02, -3.6726e+00],
+                                                         [-3.0076e+00, -6.9163e-01],
+                                                         [-7.5929e-03, -3.0764e+00]]])
+            let t3Logsoftmax1 = t3.logsoftmax(1)
+            let t3Logsoftmax1Correct = combo.tensor([[[-1.7120e+00, -6.6966e+00],
+                                                         [-2.3962e+00, -7.5431e+00],
+                                                         [-2.6764e+00, -6.5767e-03],
+                                                         [-4.1609e-01, -5.3412e+00]],
+
+                                                        [[-2.9332e-02, -3.0214e+00],
+                                                         [-3.6943e+00, -5.0591e-02],
+                                                         [-5.9562e+00, -8.3955e+00],
+                                                         [-6.5344e+00, -7.8932e+00]],
+
+                                                        [[-1.7061e+00, -2.0257e+00],
+                                                         [-1.6935e+00, -2.7031e+00],
+                                                         [-9.6285e+00, -2.2207e-01],
+                                                         [-4.5492e-01, -8.3568e+00]]])
+            let t3Logsoftmax2 = t3.logsoftmax(2)
+            let t3Logsoftmax2Correct = combo.tensor([[[-3.1340e-01, -1.3129e+00],
+                                                         [-2.7226e-01, -1.4341e+00],
+                                                         [-6.6562e+00, -1.2869e-03],
+                                                         [-3.2976e-01, -1.2698e+00]],
+
+                                                        [[-1.7658e-01, -1.8210e+00],
+                                                         [-4.9982e+00, -6.7731e-03],
+                                                         [-2.8944e-01, -1.3810e+00],
+                                                         [-6.8761e-01, -6.9871e-01]],
+
+                                                        [[-2.8010e-01, -1.4094e+00],
+                                                         [-1.5026e-01, -1.9696e+00],
+                                                         [-8.5969e+00, -1.8464e-04],
+                                                         [-1.6461e-04, -8.7118e+00]]])
+            Assert.That(t1Logsoftmax0.allclose(t1Logsoftmax0Correct, 0.01))
+            Assert.That(t2Logsoftmax0.allclose(t2Logsoftmax0Correct, 0.01))
+            Assert.That(t2Logsoftmax1.allclose(t2Logsoftmax1Correct, 0.01))
+            Assert.That(t3Logsoftmax0.allclose(t3Logsoftmax0Correct, 0.01))
+            Assert.That(t3Logsoftmax1.allclose(t3Logsoftmax1Correct, 0.01))
+            Assert.That(t3Logsoftmax2.allclose(t3Logsoftmax2Correct, 0.01))
+
+    [<Test>]
+    member _.TestTensorLogsumexp () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1 = combo.tensor([2.7291, 0.0607, 0.8290])
+            let t1Logsumexp0 = t1.logsumexp(0)
+            let t1Logsumexp0Correct = combo.tensor(2.9271)
+            let t1Logsumexp0keepdim = t1.logsumexp(0, keepDim=true)
+            let t1Logsumexp0keepdimCorrect = combo.tensor([2.9271])
+
+            let t2 = combo.tensor([[1.3335, 1.6616, 2.4874, 6.1722],
+                                    [3.3478, 9.3019, 1.0844, 8.9874],
+                                    [8.6300, 1.8842, 9.1387, 9.1321]])
+            let t2Logsumexp0 = t2.logsumexp(0)
+            let t2Logsumexp0Correct = combo.tensor([8.6357, 9.3030, 9.1403, 9.7829])
+            let t2Logsumexp0keepdim = t2.logsumexp(0, keepDim=true)
+            let t2Logsumexp0keepdimCorrect = combo.tensor([[8.6357, 9.3030, 9.1403, 9.7829]])
+            let t2Logsumexp1 = t2.logsumexp(1)
+            let t2Logsumexp1Correct = combo.tensor([ 6.2153,  9.8518, 10.0924])
+            let t2Logsumexp1keepdim = t2.logsumexp(1, keepDim=true)
+            let t2Logsumexp1keepdimCorrect = combo.tensor([[ 6.2153],
+                                                            [ 9.8518],
+                                                            [10.0924]])
+
+            let t3 = combo.tensor([[[3.0897, 2.0902],
+                                     [2.4055, 1.2437],
+                                     [2.1253, 8.7802],
+                                     [4.3856, 3.4456]],
+
+                                    [[8.6233, 6.9789],
+                                     [4.9583, 9.9497],
+                                     [2.6964, 1.6048],
+                                     [2.1182, 2.1071]],
+
+                                    [[8.1097, 6.9804],
+                                     [8.1223, 6.3030],
+                                     [0.1873, 8.7840],
+                                     [9.3609, 0.6493]]])
+             
+            let t3Logsumexp0 = t3.logsumexp(0)
+            let t3Logsumexp0Correct = combo.tensor([[9.0947, 7.6766],
+                                                        [8.1668, 9.9756],
+                                                        [3.1949, 9.4756],
+                                                        [9.3685, 3.7257]])
+            let t3Logsumexp0keepdim = t3.logsumexp(0, keepDim=true)
+            let t3Logsumexp0keepdimCorrect = combo.tensor([[[9.0947, 7.6766],
+                                                             [8.1668, 9.9756],
+                                                             [3.1949, 9.4756],
+                                                             [9.3685, 3.7257]]])                                                    
+            let t3Logsumexp1 = t3.logsumexp(1)
+            let t3Logsumexp1Correct = combo.tensor([[ 4.8017,  8.7868],
+                                                        [ 8.6526, 10.0003],
+                                                        [ 9.8158,  9.0061]])
+            let t3Logsumexp1keepdim = t3.logsumexp(1, keepDim=true)
+            let t3Logsumexp1keepdimCorrect = combo.tensor([[[ 4.8017,  8.7868]],
+
+                                                            [[ 8.6526, 10.0003]],
+
+                                                            [[ 9.8158,  9.0061]]])
+            let t3Logsumexp2 = t3.logsumexp(2)
+            let t3Logsumexp2Correct = combo.tensor([[3.4031, 2.6778, 8.7815, 4.7154],
+                                                        [8.7999, 9.9565, 2.9858, 2.8058],
+                                                        [8.3898, 8.2726, 8.7842, 9.3611]])
+            let t3Logsumexp2keepdim = t3.logsumexp(2, keepDim=true)
+            let t3Logsumexp2keepdimCorrect = combo.tensor([[[3.4031],
+                                                             [2.6778],
+                                                             [8.7815],
+                                                             [4.7154]],
+
+                                                            [[8.7999],
+                                                             [9.9565],
+                                                             [2.9858],
+                                                             [2.8058]],
+
+                                                            [[8.3898],
+                                                             [8.2726],
+                                                             [8.7842],
+                                                             [9.3611]]])
+
+            let t4 = combo.tensor([[167.385696, -146.549866, 168.850235, -41.856903, -56.691696, -78.774994, 42.035625, 97.490936, -42.763878, -2.130855], 
+                                     [-62.961613, -497.529846, 371.218231, -30.224543, 368.146393, -325.945068, -292.102631, -24.760872, 130.348282, -193.775909]])
+            let t4Logsumexp1 = t4.logsumexp(dim=1)
+            let t4Logsumexp1Correct = combo.tensor([169.0582, 371.2635])
+            Assert.That(t1Logsumexp0.allclose(t1Logsumexp0Correct, 0.001))
+            Assert.That(t2Logsumexp0.allclose(t2Logsumexp0Correct, 0.001))
+            Assert.That(t2Logsumexp1.allclose(t2Logsumexp1Correct, 0.001))
+            Assert.That(t3Logsumexp0.allclose(t3Logsumexp0Correct, 0.001))
+            Assert.That(t3Logsumexp1.allclose(t3Logsumexp1Correct, 0.001))
+            Assert.That(t3Logsumexp2.allclose(t3Logsumexp2Correct, 0.001))
+            Assert.That(t1Logsumexp0keepdim.allclose(t1Logsumexp0keepdimCorrect, 0.001))
+            Assert.That(t2Logsumexp0keepdim.allclose(t2Logsumexp0keepdimCorrect, 0.001))
+            Assert.That(t2Logsumexp1keepdim.allclose(t2Logsumexp1keepdimCorrect, 0.001))
+            Assert.That(t3Logsumexp0keepdim.allclose(t3Logsumexp0keepdimCorrect, 0.001))
+            Assert.That(t3Logsumexp1keepdim.allclose(t3Logsumexp1keepdimCorrect, 0.001))
+            Assert.That(t3Logsumexp2keepdim.allclose(t3Logsumexp2keepdimCorrect, 0.001))
+            Assert.That(t4Logsumexp1.allclose(t4Logsumexp1Correct, 0.75))
+
+    [<Test>]
+    member _.TestTensorNllLoss () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1a = combo.tensor([[0.15,0.85],[0.5,0.5],[0.8,0.2]]).log()
+            let t1b = combo.tensor([0,1,1])
+            let t1w = combo.tensor([-1.2,0.6])
+            let l1 = dsharp.nllLoss(t1a, t1b)
+            let l1Correct = combo.tensor(1.3999)
+            // Note, test disabled - this is not the correct answer, even on the backend
+            // it was coming out as -Infinity
+            //let l2 = dsharp.nllLoss(t1a, t1b, weight=t1w)
+            //let l2Correct = combo.tensor(-0.8950)
+            let l3 = dsharp.nllLoss(t1a, t1b, reduction="none")
+            let l3Correct = combo.tensor([1.8971, 0.6931, 1.6094])
+            let l4 = dsharp.nllLoss(t1a, t1b, reduction="none", weight=t1w)
+            let l4Correct = combo.tensor([-2.2765,  0.4159,  0.9657])
+            let l5 = dsharp.nllLoss(t1a, t1b, reduction="sum")
+            let l5Correct = combo.tensor(4.1997)
+            let l6 = dsharp.nllLoss(t1a, t1b, reduction="sum", weight=t1w)
+            let l6Correct = combo.tensor(-0.8950)
+
+            let t2a = combo.tensor([[[[-1.9318, -1.9386, -0.9488, -0.8787],
+                                          [-1.1891, -2.4614, -1.0514, -1.1577],
+                                          [-1.1977, -1.2468, -0.8123, -1.2226],
+                                          [-0.9584, -2.1857, -0.9079, -1.5362]],
+
+                                         [[-0.5465, -0.3825, -1.2375, -0.8330],
+                                          [-2.4107, -0.8157, -0.9717, -1.0601],
+                                          [-0.9040, -1.3655, -1.6613, -1.0334],
+                                          [-0.8829, -1.4097, -1.5420, -1.9021]],
+
+                                         [[-1.2868, -1.7491, -1.1311, -1.8975],
+                                          [-0.5013, -0.7500, -1.3016, -1.0807],
+                                          [-1.2271, -0.7824, -1.0044, -1.0505],
+                                          [-1.5950, -0.4410, -0.9606, -0.4533]]],
+
+
+                                        [[[-1.9389, -2.4012, -1.0333, -1.4381],
+                                          [-1.5336, -1.6488, -2.1201, -1.5972],
+                                          [-1.2268, -1.2666, -0.7287, -1.1079],
+                                          [-1.3558, -1.0362, -1.2035, -1.0245]],
+
+                                         [[-0.5721, -0.3562, -1.0314, -0.8208],
+                                          [-0.4922, -0.5392, -0.9215, -0.5276],
+                                          [-1.3011, -0.6734, -0.9661, -0.5593],
+                                          [-0.6594, -0.9271, -1.0346, -0.7122]],
+
+                                         [[-1.2316, -1.5651, -1.2460, -1.1315],
+                                          [-1.7548, -1.4939, -0.7297, -1.5724],
+                                          [-0.8335, -1.5690, -1.9886, -2.3212],
+                                          [-1.4912, -1.3883, -1.0658, -1.8940]]]])
+            let t2b = combo.tensor([[[2, 0, 1, 2],
+                                         [2, 0, 1, 0],
+                                         [2, 1, 0, 1],
+                                         [1, 2, 1, 1]],
+
+                                        [[2, 0, 2, 0],
+                                         [0, 1, 0, 2],
+                                         [2, 0, 2, 1],
+                                         [1, 1, 1, 2]]])
+            let t2w = combo.tensor([ 1.1983, -0.2633, -0.3064])
+            let l7 = dsharp.nllLoss(t2a, t2b)
+            let l7Correct = combo.tensor(1.3095)
+            let l8 = dsharp.nllLoss(t2a, t2b, weight=t2w)
+            let l8Correct = combo.tensor(2.4610)
+            let l9 = dsharp.nllLoss(t2a, t2b, reduction="none")
+            let l9Correct = combo.tensor([[[1.2868, 1.9386, 1.2375, 1.8975],
+                                             [0.5013, 2.4614, 0.9717, 1.1577],
+                                             [1.2271, 1.3655, 0.8123, 1.0334],
+                                             [0.8829, 0.4410, 1.5420, 1.9021]],
+
+                                            [[1.2316, 2.4012, 1.2460, 1.4381],
+                                             [1.5336, 0.5392, 2.1201, 1.5724],
+                                             [0.8335, 1.2666, 1.9886, 0.5593],
+                                             [0.6594, 0.9271, 1.0346, 1.8940]]])
+            let l10 = dsharp.nllLoss(t2a, t2b, reduction="none", weight=t2w)
+            let l10Correct = combo.tensor([[[-0.3943,  2.3231, -0.3258, -0.5814],
+                                             [-0.1536,  2.9496, -0.2558,  1.3872],
+                                             [-0.3760, -0.3595,  0.9734, -0.2721],
+                                             [-0.2324, -0.1351, -0.4059, -0.5007]],
+
+                                            [[-0.3774,  2.8775, -0.3818,  1.7233],
+                                             [ 1.8378, -0.1419,  2.5406, -0.4818],
+                                             [-0.2554,  1.5179, -0.6093, -0.1472],
+                                             [-0.1736, -0.2440, -0.2724, -0.5804]]])
+            let l11 = dsharp.nllLoss(t2a, t2b, reduction="sum")
+            let l11Correct = combo.tensor(41.9042)
+            let l12 = dsharp.nllLoss(t2a, t2b, reduction="sum", weight=t2w)
+            let l12Correct = combo.tensor(10.4726)
+
+            Assert.That(l1Correct.allclose(l1, 0.001))
+            //Assert.That(l2Correct.allclose(l2, 0.001))
+            Assert.That(l3Correct.allclose(l3, 0.001))
+            Assert.That(l4Correct.allclose(l4, 0.001))
+            Assert.That(l5Correct.allclose(l5, 0.001))
+            Assert.That(l6Correct.allclose(l6, 0.001))
+            Assert.That(l7Correct.allclose(l7, 0.001))
+            Assert.That(l8Correct.allclose(l8, 0.001))
+            Assert.That(l9Correct.allclose(l9, 0.001))
+            Assert.That(l10Correct.allclose(l10, 0.001))
+            Assert.That(l11Correct.allclose(l11, 0.001))
+            Assert.That(l12Correct.allclose(l12, 0.001))
+
+    [<Test>]
+    member _.TestTensorCrossEntropyLoss () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1a = combo.tensor([[-0.6596,  0.3078, -0.2525, -0.2593, -0.2354],
+                                        [ 0.4708,  0.6073,  1.5621, -1.4636,  0.9769],
+                                        [ 0.5078,  0.0579,  1.0054,  0.3532,  1.1819],
+                                        [ 1.5425, -0.2887,  1.0716, -1.3946,  0.8806]])
+            let t1b = combo.tensor([3, 1, 0, 4])
+            let t1w = combo.tensor([-1.4905,  0.5929,  1.0018, -1.0858, -0.5993])
+            let l1 = dsharp.crossEntropyLoss(t1a, t1b)
+            let l1Correct = combo.tensor(1.7059)
+            let l2 = dsharp.crossEntropyLoss(t1a, t1b, weight=t1w)
+            let l2Correct = combo.tensor(1.6969)
+            let l3 = dsharp.crossEntropyLoss(t1a, t1b, reduction="none")
+            let l3Correct = combo.tensor([1.6983, 1.7991, 1.8085, 1.5178])
+            let l4 = dsharp.crossEntropyLoss(t1a, t1b, reduction="none", weight=t1w)
+            let l4Correct = combo.tensor([-1.8439,  1.0666, -2.6956, -0.9096])
+            let l5 = dsharp.crossEntropyLoss(t1a, t1b, reduction="sum")
+            let l5Correct = combo.tensor(6.8237)
+            let l6 = dsharp.crossEntropyLoss(t1a, t1b, reduction="sum", weight=t1w)
+            let l6Correct = combo.tensor(-4.3825)
+
+            Assert.That(l1Correct.allclose(l1, 0.001))
+            Assert.That(l2Correct.allclose(l2, 0.001))
+            Assert.That(l3Correct.allclose(l3, 0.001))
+            Assert.That(l4Correct.allclose(l4, 0.001))
+            Assert.That(l5Correct.allclose(l5, 0.001))
+            Assert.That(l6Correct.allclose(l6, 0.001))
+
+    [<Test>]
+    member _.TestTensorMseLoss () =
+        for combo in Combos.FloatingPoint do 
+            let t1a = combo.tensor([-0.2425,  0.2643,  0.7070,  1.2049,  1.6245])
+            let t1b = combo.tensor([-1.0742,  1.5874,  0.6509,  0.8715,  0.0692])
+            let l1 = dsharp.mseLoss(t1a, t1b)
+            let l1Correct = combo.tensor(0.9951)
+            let l2 = dsharp.mseLoss(t1a, t1b, reduction="none")
+            let l2Correct = combo.tensor([0.6917, 1.7507, 0.0031, 0.1112, 2.4190])
+            let l3 = dsharp.mseLoss(t1a, t1b, reduction="sum")
+            let l3Correct = combo.tensor(4.9756)
+
+            let t2a = combo.tensor([[ 0.6650,  0.5049, -0.7356,  0.5312, -0.6574],
+                                     [ 1.0133,  0.9106,  0.1523,  0.2662,  1.1438],
+                                     [ 0.3641, -1.8525, -0.0822, -1.0361,  0.2723]])
+            let t2b = combo.tensor([[-1.0001, -1.4867, -0.3340, -0.2590,  0.1395],
+                                     [-2.0158,  0.8281,  1.1726, -0.2359,  0.5007],
+                                     [ 1.3242,  0.5215,  1.4293, -1.4235,  0.2473]])
+            let l4 = dsharp.mseLoss(t2a, t2b)
+            let l4Correct = combo.tensor(1.8694)
+            let l5 = dsharp.mseLoss(t2a, t2b, reduction="none")
+            let l5Correct = combo.tensor([[2.7726e+00, 3.9663e+00, 1.6130e-01, 6.2438e-01, 6.3511e-01],
+                                            [9.1753e+00, 6.8075e-03, 1.0409e+00, 2.5207e-01, 4.1352e-01],
+                                            [9.2194e-01, 5.6358e+00, 2.2848e+00, 1.5011e-01, 6.2556e-04]])
+            let l6 = dsharp.mseLoss(t2a, t2b, reduction="sum")
+            let l6Correct = combo.tensor(28.0416)
+
+            Assert.That(l1Correct.allclose(l1, 0.01, 0.01))
+            Assert.That(l2Correct.allclose(l2, 0.01, 0.01))
+            Assert.That(l3Correct.allclose(l3, 0.01, 0.01))
+            Assert.That(l4Correct.allclose(l4, 0.01, 0.01))
+            Assert.That(l5Correct.allclose(l5, 0.01, 0.01))
+            Assert.That(l6Correct.allclose(l6, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorBceLoss () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t1a = combo.tensor([[0.6732, 0.3984, 0.1378, 0.4564, 0.0396],
+                                    [0.7311, 0.6797, 0.8294, 0.8716, 0.5781],
+                                    [0.6032, 0.0346, 0.3714, 0.7304, 0.0434]])
+            let t1b = combo.tensor([[0.1272, 0.8250, 0.5473, 0.2635, 0.2387],
+                                    [0.9150, 0.9273, 0.3127, 0.7458, 0.5805],
+                                    [0.2771, 0.3095, 0.8710, 0.0176, 0.7242]])
+            let t1w = combo.tensor([0.9270, 0.4912, 0.7324])
+            let l1 = dsharp.bceLoss(t1a, t1b)
+            let l1Correct = combo.tensor(0.9516)
+            let l2 = dsharp.bceLoss(t1a, t1b, reduction="none")
+            let l2Correct = combo.tensor([[1.0264, 0.8481, 1.1520, 0.6556, 0.8016],
+                                            [0.3982, 0.4408, 1.2739, 0.6242, 0.6801],
+                                            [0.8083, 1.0655, 0.9226, 1.2933, 2.2837]])
+            let l3 = dsharp.bceLoss(t1a, t1b, reduction="sum")
+            let l3Correct = combo.tensor(14.2745)
+            let l4 = dsharp.bceLoss(t1a, t1b, weight=t1w)
+            let l4Correct = combo.tensor(0.7002)
+            let l5 = dsharp.bceLoss(t1a, t1b, reduction="none", weight=t1w)
+            let l5Correct = combo.tensor([[0.9515, 0.7862, 1.0679, 0.6078, 0.7431],
+                                            [0.1956, 0.2165, 0.6258, 0.3066, 0.3341],
+                                            [0.5920, 0.7804, 0.6757, 0.9472, 1.6726]])
+            let l6 = dsharp.bceLoss(t1a, t1b, reduction="sum", weight=t1w)
+            let l6Correct = combo.tensor(10.5032)
+
+            Assert.That(l1Correct.allclose(l1, 0.01, 0.01))
+            Assert.That(l2Correct.allclose(l2, 0.01, 0.01))
+            Assert.That(l3Correct.allclose(l3, 0.01, 0.01))
+            Assert.That(l4Correct.allclose(l4, 0.01, 0.01))
+            Assert.That(l5Correct.allclose(l5, 0.01, 0.01))
+            Assert.That(l6Correct.allclose(l6, 0.01, 0.01))
+
+            let t2a = combo.tensor([[[[0.2059, 0.2048],
+                                       [0.4209, 0.4259],
+                                       [0.7000, 0.0548],
+                                       [0.8414, 0.4659]],
+                             
+                                      [[0.5841, 0.3222],
+                                       [0.3938, 0.6895],
+                                       [0.9694, 0.9424],
+                                       [0.1203, 0.3375]],
+                             
+                                      [[0.8713, 0.6122],
+                                       [0.1029, 0.6634],
+                                       [0.2255, 0.0801],
+                                       [0.7698, 0.8014]]],
+                             
+                             
+                                     [[[0.5361, 0.2349],
+                                       [0.9668, 0.7860],
+                                       [0.8522, 0.2324],
+                                       [0.3445, 0.2305]],
+                             
+                                      [[0.8407, 0.0118],
+                                       [0.5815, 0.7817],
+                                       [0.3957, 0.3336],
+                                       [0.3524, 0.2699]],
+                             
+                                      [[0.4809, 0.0975],
+                                       [0.9397, 0.7868],
+                                       [0.0906, 0.2595],
+                                       [0.7253, 0.7387]]]])
+            let t2b = combo.tensor([[[[0.2288, 0.7639],
+                                       [0.6365, 0.8856],
+                                       [0.4637, 0.6467],
+                                       [0.2029, 0.7676]],
+                             
+                                      [[0.8061, 0.7229],
+                                       [0.8075, 0.6854],
+                                       [0.4748, 0.9848],
+                                       [0.6456, 0.3080]],
+                             
+                                      [[0.5581, 0.5572],
+                                       [0.0235, 0.9419],
+                                       [0.1633, 0.3299],
+                                       [0.9430, 0.8501]]],
+                             
+                             
+                                     [[[0.5505, 0.8364],
+                                       [0.8939, 0.3968],
+                                       [0.5038, 0.3390],
+                                       [0.4534, 0.2653]],
+                             
+                                      [[0.5895, 0.2453],
+                                       [0.1937, 0.5779],
+                                       [0.2846, 0.4662],
+                                       [0.6777, 0.2539]],
+                             
+                                      [[0.9834, 0.2274],
+                                       [0.7440, 0.5305],
+                                       [0.4400, 0.0725],
+                                       [0.7565, 0.9452]]]])
+            let l7 = dsharp.bceLoss(t2a, t2b)
+            let l7Correct = combo.tensor(0.7858)
+            Assert.That(l7Correct.allclose(l7, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorNormalize () =
+        for combo in Combos.FloatingPoint do
+            let t0 = combo.tensor(0.5)
+            let t0n = t0.normalize()
+            let t0nCorrect = combo.tensor(0.)
+
+            let t1 = combo.tensor([-2,-2])
+            let t1n = t1.normalize()
+            let t1nCorrect = combo.tensor([0.,0.])
+
+            let t2 = combo.tensor([[-2.,-1.,0.,1.,2.,3.],[0.5, 0.7, -5.2, 2.3, 1., 2.]])
+            let t2n = t2.normalize()
+            let t2nCorrect = combo.tensor([[0.3902, 0.5122, 0.6341, 0.7561, 0.8780, 1.0000],
+                                            [0.6951, 0.7195, 0.0000, 0.9146, 0.7561, 0.8780]])
+
+            Assert.That(t0nCorrect.allclose(t0n, 0.01, 0.01))
+            Assert.That(t1nCorrect.allclose(t1n, 0.01, 0.01))
+            Assert.That(t2nCorrect.allclose(t2n, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorStandardize () =
+        for combo in Combos.FloatingPointExcept16s do
+            let t0 = combo.tensor(0.5)
+            let t0s = t0.standardize()
+            let t0sCorrect = combo.tensor(0.)
+
+            let t1 = combo.tensor([-2,-2])
+            let t1s = t1.standardize()
+            let t1sCorrect = combo.tensor([0.,0.])
+
+            let t2 = combo.tensor([[-2.,-1.,0.,1.,2.,3.],[0.5, 0.7, -5.2, 2.3, 1., 2.]])
+            let t2s = t2.standardize()
+            let t2sCorrect = combo.tensor([[-1.0496, -0.6046, -0.1595,  0.2856,  0.7307,  1.1757],
+                                            [ 0.0631,  0.1521, -2.4739,  0.8642,  0.2856,  0.7307]])
+
+            Assert.That(t0sCorrect.allclose(t0s, 0.01, 0.01))
+            Assert.That(t1sCorrect.allclose(t1s, 0.01, 0.01))
+            Assert.That(t2sCorrect.allclose(t2s, 0.01, 0.01))
+
+    [<Test>]
+    member _.TestTensorIEnumerable () =
+        for combo in Combos.All do 
+            let t1 = combo.tensor([1,2,3])
+            t1.unstack() |> Seq.iteri (fun i v -> Assert.CheckEqual(t1[i], v))
+            let t2 = combo.tensor([[1,2,3], [4,5,6]])
+            t2.unstack() |> Seq.iteri (fun i v -> Assert.CheckEqual(t2[i,*], v))
+
+    [<Test>]
+    member _.TestTensorFSharpCoreOps () =
+        for combo in Combos.FloatingPointExcept16s do 
+            let t = combo.tensor([0.1; 0.2; 0.3])
+            let add = t + t
+            let addCorrect = t.add(t)
+            let sub = t - t
+            let subCorrect = t.sub(t)
+            let mul = t * t
+            let mulCorrect = t.mul(t)
+            let div = t / t
+            let divCorrect = t.div(t)
+            let pow = t ** t
+            let powCorrect = t.pow(t)
+            let neg = -t
+            let negCorrect = t.neg()
+            // sign t not supported because FSharp.Core sign operator returns int
+            let floor = floor t
+            let floorCorrect = t.floor()
+            let ceil = ceil t
+            let ceilCorrect = t.ceil()
+            let round = round t
+            let roundCorrect = t.round()
+            let abs = abs t
+            let absCorrect = t.abs()
+            let exp = exp t
+            let expCorrect = t.exp()
+            let log = log t
+            let logCorrect = t.log()
+            let log10 = log10 t
+            let log10Correct = t.log10()
+            let sqrt = sqrt t
+            let sqrtCorrect = t.sqrt()
+            let sin = sin t
+            let sinCorrect = t.sin()
+            let cos = cos t
+            let cosCorrect = t.cos()
+            let tan = tan t
+            let tanCorrect = t.tan()
+            let sinh = sinh t
+            let sinhCorrect = t.sinh()
+            let cosh = cosh t
+            let coshCorrect = t.cosh()
+            let tanh = tanh t
+            let tanhCorrect = t.tanh()
+            let asin = asin t
+            let asinCorrect = t.asin()
+            let acos = acos t
+            let acosCorrect = t.acos()
+            let atan = atan t
+            let atanCorrect = t.atan()
+        
+            Assert.CheckEqual(addCorrect, add)
+            Assert.CheckEqual(subCorrect, sub)
+            Assert.CheckEqual(mulCorrect, mul)
+            Assert.CheckEqual(divCorrect, div)
+            Assert.CheckEqual(powCorrect, pow)
+            Assert.CheckEqual(negCorrect, neg)
+            Assert.CheckEqual(floorCorrect, floor)
+            Assert.CheckEqual(ceilCorrect, ceil)
+            Assert.CheckEqual(roundCorrect, round)
+            Assert.CheckEqual(absCorrect, abs)
+            Assert.CheckEqual(expCorrect, exp)
+            Assert.CheckEqual(logCorrect, log)
+            Assert.CheckEqual(log10Correct, log10)
+            Assert.CheckEqual(sqrtCorrect, sqrt)
+            Assert.CheckEqual(sinCorrect, sin)
+            Assert.CheckEqual(cosCorrect, cos)
+            Assert.CheckEqual(tanCorrect, tan)
+            Assert.CheckEqual(sinhCorrect, sinh)
+            Assert.CheckEqual(coshCorrect, cosh)
+            Assert.CheckEqual(tanhCorrect, tanh)
+            Assert.CheckEqual(asinCorrect, asin)
+            Assert.CheckEqual(acosCorrect, acos)
+            Assert.CheckEqual(atanCorrect, atan)
+
+            Assert.CheckEqual(combo.dtype, add.dtype)
+            Assert.CheckEqual(combo.dtype, sub.dtype)
+            Assert.CheckEqual(combo.dtype, mul.dtype)
+            Assert.CheckEqual(combo.dtype, div.dtype)
+            Assert.CheckEqual(combo.dtype, pow.dtype)
+            Assert.CheckEqual(combo.dtype, neg.dtype)
+            Assert.CheckEqual(combo.dtype, floor.dtype)
+            Assert.CheckEqual(combo.dtype, ceil.dtype)
+            Assert.CheckEqual(combo.dtype, round.dtype)
+            Assert.CheckEqual(combo.dtype, abs.dtype)
+            Assert.CheckEqual(combo.dtype, exp.dtype)
+            Assert.CheckEqual(combo.dtype, log.dtype)
+            Assert.CheckEqual(combo.dtype, log10.dtype)
+            Assert.CheckEqual(combo.dtype, sqrt.dtype)
+            Assert.CheckEqual(combo.dtype, sin.dtype)
+            Assert.CheckEqual(combo.dtype, cos.dtype)
+            Assert.CheckEqual(combo.dtype, tan.dtype)
+            Assert.CheckEqual(combo.dtype, sinh.dtype)
+            Assert.CheckEqual(combo.dtype, cosh.dtype)
+            Assert.CheckEqual(combo.dtype, tanh.dtype)
+            Assert.CheckEqual(combo.dtype, asin.dtype)
+            Assert.CheckEqual(combo.dtype, acos.dtype)
+            Assert.CheckEqual(combo.dtype, atan.dtype)
diff --git a/tests/TensorMath.Tests/TestTensorMath.fs b/tests/TensorMath.Tests/TestTensorMath.fs
new file mode 100644
index 0000000..c5788c9
--- /dev/null
+++ b/tests/TensorMath.Tests/TestTensorMath.fs
@@ -0,0 +1,309 @@
+﻿// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open NUnit.Framework
+open TensorMath
+open TensorMath.Compose
+
+
+[<TestFixture>]
+type TestTensorMath () =
+
+    let rosenbrock (x:Tensor) = 
+        let x, y = x[0], x[1]
+        (1. - x)**2 + 100. * (y - x**2)**2
+    let rosenbrockGrad (x:Tensor) = 
+        let x, y = x[0], x[1]
+        dsharp.tensor([-2*(1-x)-400*x*(-(x**2) + y); 200*(-(x**2) + y)])
+    let rosenbrockHessian (x:Tensor) = 
+        let x, y = x[0], x[1]
+        dsharp.tensor([[2.+1200.*x*x-400.*y, -400.*x],[-400.*x, 200.*dsharp.one()]])
+
+    let fscalarscalar (x:Tensor) = dsharp.sin x
+    let fscalarscalarDiff (x:Tensor) = dsharp.cos x
+
+    let fscalarvect3 (x:Tensor) = dsharp.stack([sin x; exp x; cos x])
+    let fscalarvect3Diff (x:Tensor) = dsharp.stack([cos x; exp x; -sin x])
+    let fscalarvect3Diff2 (x:Tensor) = dsharp.stack([-sin x; exp x; -cos x])
+    let fscalarvect3Diff3 (x:Tensor) = dsharp.stack([-cos x; exp x; sin x])
+
+    let fvect2vect2 (x:Tensor) = 
+        let x, y = x[0], x[1]
+        dsharp.stack([x*x*y; 5*x+sin y])
+    let fvect2vect2Jacobian (x:Tensor) = 
+        let x, y = x[0], x[1]
+        dsharp.tensor([[2*x*y; x*x];[dsharp.tensor(5.); cos y]])
+
+    let fvect3vect2 (x:Tensor) = 
+        let x, y, z = x[0], x[1], x[2]
+        dsharp.stack([x*y+2*y*z;2*x*y*y*z])
+    let fvect3vect2Jacobian (x:Tensor) = 
+        let x, y, z = x[0], x[1], x[2]
+        dsharp.tensor([[y;x+2*z;2*y];[2*y*y*z;4*x*y*z;2*x*y*y]])
+
+    let fvect3vect3 (x:Tensor) = 
+        let r, theta, phi = x[0], x[1], x[2]
+        dsharp.stack([r*(sin phi)*(cos theta); r*(sin phi)*(sin theta); r*cos phi])
+    let fvect3vect3Jacobian (x:Tensor) = 
+        let r, theta, phi = x[0], x[1], x[2]
+        dsharp.tensor([[(sin phi)*(cos theta); -r*(sin phi)*(sin theta); r*(cos phi)*(cos theta)];[(sin phi)*(sin theta); r*(sin phi)*(cos theta); r*(cos phi)*(sin theta)];[cos phi; dsharp.zero(); -r*sin phi]])
+
+    let fvect3vect4 (x:Tensor) =
+        let y1, y2, y3, y4 = x[0], 5*x[2], 4*x[1]*x[1]-2*x[2],x[2]*sin x[0]
+        dsharp.stack([y1;y2;y3;y4])
+    let fvect3vect4Jacobian (x:Tensor) =
+        let z, o = dsharp.zero(), dsharp.one()
+        dsharp.tensor([[o,z,z],[z,z,5*o],[z,8*x[1],-2*o],[x[2]*cos x[0],z,sin x[0]]])
+
+    [<SetUp>]
+    member this.Setup () =
+        ()
+
+    [<Test>]
+    member this.TestZero () =
+        let t = dsharp.zero(dtype=Int32)
+        let tCorrect = dsharp.tensor(0)
+        Assert.CheckEqual(tCorrect, t)
+
+    [<Test>]
+    member this.TestZeros () =
+        let t = dsharp.zeros([2;3], dtype=Int32)
+        let tCorrect = dsharp.tensor([[0,0,0],[0,0,0]])
+        Assert.CheckEqual(tCorrect, t)
+
+    [<Test>]
+    member this.TestOne () =
+        let t = dsharp.one(dtype=Int32)
+        let tCorrect = dsharp.tensor(1)
+        Assert.CheckEqual(tCorrect, t)
+
+    [<Test>]
+    member this.TestOnes () =
+        let t = dsharp.ones([2;3], dtype=Int32)
+        let tCorrect = dsharp.tensor([[1,1,1],[1,1,1]])
+        Assert.CheckEqual(tCorrect, t)
+
+    [<Test>]
+    member this.TestRand () =
+        let t = dsharp.rand([1000])
+        let tMean = t.mean()
+        let tMeanCorrect = dsharp.tensor(0.5)
+        let tStddev = t.std()
+        let tStddevCorrect = dsharp.tensor(1./12.) |> dsharp.sqrt
+        Assert.That(tMeanCorrect.allclose(tMean, 0.1))
+        Assert.That(tStddevCorrect.allclose(tStddev, 0.1))
+
+    [<Test>]
+    member this.TestRandn () =
+        let t = dsharp.randn([1000])
+        let tMean = t.mean()
+        let tMeanCorrect = dsharp.tensor(0.)
+        let tStddev = t.std()
+        let tStddevCorrect = dsharp.tensor(1.)
+        Assert.That(tMeanCorrect.allclose(tMean, 0.1, 0.1))
+        Assert.That(tStddevCorrect.allclose(tStddev, 0.1, 0.1))
+
+    [<Test>]
+    member this.TestArange () =
+        let t1 = dsharp.arange(5.)
+        let t1Correct = dsharp.tensor([0.,1.,2.,3.,4.])
+        let t2 = dsharp.arange(startVal=1., endVal=4.)
+        let t2Correct = dsharp.tensor([1.,2.,3.])
+        let t3 = dsharp.arange(startVal=1., endVal=2.5, step=0.5)
+        let t3Correct = dsharp.tensor([1.,1.5,2.])
+        Assert.CheckEqual(t1Correct, t1)
+        Assert.CheckEqual(t2Correct, t2)
+        Assert.CheckEqual(t3Correct, t3)
+
+
+    [<Test>]
+    member this.TestSeed () =
+        for combo in Combos.FloatingPointExcept16s do
+            printfn "%A" (combo.device, combo.backend, combo.dtype)
+            use _holder = dsharp.useConfig(combo.dtype, combo.device, combo.backend)
+            dsharp.seed(123)
+            let t = combo.randint(0,10,[25])
+            dsharp.seed(123)
+            let t2 = combo.randint(0,10,[25])
+            Assert.CheckEqual(t, t2)
+
+    [<Test>]
+    member this.TestSlice () =
+        let t = dsharp.tensor([1, 2, 3])
+        let tSlice = t |> dsharp.slice([0])
+        let tSliceCorrect = t[0]
+        Assert.CheckEqual(tSliceCorrect, tSlice)
+
+    member _.TestCanConfigure () =
+        
+        // Backup the current config before the test to restore in the end
+        let configBefore = dsharp.config()
+
+        // Default reference backend with CPU
+        let device = Device.Default
+        dsharp.config(device=Device.CPU)
+        Assert.CheckEqual(Device.CPU, Device.Default)
+        dsharp.config(device=device)
+
+        // Torch with default backend (CPU)
+        let backend = Backend.Default
+        dsharp.config(backend=Backend.Torch)
+        Assert.CheckEqual(Backend.Torch, Backend.Default)
+        dsharp.config(backend=backend)
+
+        // Default reference backend with "int32"
+        let dtype = Dtype.Default
+        dsharp.config(dtype=Dtype.Float64)
+        Assert.CheckEqual(Dtype.Float64, Dtype.Default)
+        dsharp.config(dtype=dtype)
+
+        // Restore the config before the test
+        dsharp.config(configBefore)
+
+    [<Test>]
+    member _.TestBackends () =
+        let backends = dsharp.backends() |> List.sort
+        let backendsCorrect = [Backend.Reference; Backend.Torch] |> List.sort
+        Assert.CheckEqual(backendsCorrect, backends)
+
+    [<Test>]
+    member _.TestDevices () =
+        // Get devices for default reference backend
+        let defaultReferenceBackendDevices = dsharp.devices()
+        Assert.CheckEqual([Device.CPU], defaultReferenceBackendDevices)
+
+        // Get devices for explicitly specified reference backend
+        let explicitReferenceBackendDevices = dsharp.devices(backend=Backend.Reference)
+        Assert.CheckEqual([Device.CPU], explicitReferenceBackendDevices)
+
+        // Get CPU devices for explicitly specified reference backend
+        let explicitReferenceBackendCPUDevices = dsharp.devices(backend=Backend.Reference, deviceType=DeviceType.CPU)
+        Assert.CheckEqual([Device.CPU], explicitReferenceBackendCPUDevices)
+
+        // Get devices for explicitly specified Torch backend
+        let explicitTorchBackendDevices = dsharp.devices(backend=Backend.Torch)
+        Assert.That(explicitTorchBackendDevices |> List.contains Device.CPU)
+        let cudaAvailable = TorchSharp.torch.cuda.is_available()
+        Assert.CheckEqual(cudaAvailable, (explicitTorchBackendDevices |> List.contains Device.GPU))
+
+        let explicitTorchBackendDevices = dsharp.devices(backend=Backend.Torch)
+        Assert.That(explicitTorchBackendDevices |> List.contains Device.CPU)
+        let cudaAvailable = TorchSharp.torch.cuda.is_available()
+        Assert.CheckEqual(cudaAvailable, (explicitTorchBackendDevices |> List.contains Device.GPU))
+
+    [<Test>]
+    member _.TestIsBackendAvailable () =
+        let referenceBackendAvailable = dsharp.isBackendAvailable(Backend.Reference)
+        Assert.That(referenceBackendAvailable)
+
+    [<Test>]
+    member _.TestIsDeviceAvailable () =
+        let cpuAvailable = dsharp.isDeviceAvailable(Device.CPU)
+        Assert.That(cpuAvailable)
+
+    [<Test>]
+    member _.TestIsCudaAvailable () =
+        let cudaAvailable = dsharp.isCudaAvailable(Backend.Reference)
+        Assert.False(cudaAvailable)
+
+    [<Test>]
+    member _.TestIsDeviceTypeAvailable () =
+        Assert.That(dsharp.isDeviceTypeAvailable(DeviceType.CPU))
+
+        Assert.That(dsharp.isDeviceTypeAvailable(DeviceType.CPU, Backend.Reference))
+        Assert.False(dsharp.isDeviceTypeAvailable(DeviceType.CUDA, Backend.Reference))
+
+        Assert.That(dsharp.isDeviceTypeAvailable(DeviceType.CPU, Backend.Torch))
+
+        let cudaAvailable = TorchSharp.torch.cuda.is_available()
+        let deviceSupported = dsharp.isDeviceTypeAvailable(DeviceType.CUDA, Backend.Torch)
+        Assert.CheckEqual(cudaAvailable, deviceSupported)
+
+    [<Test>]
+    member _.TestTensorAPIStyles () =
+        let x = dsharp.randn([5;5])
+
+        // Base API
+        dsharp.seed(0)
+        let y1 = x.dropout(0.2).leakyRelu(0.1).sum(1)
+
+        // PyTorch-like API
+        dsharp.seed(0)
+        let y2 = dsharp.sum(dsharp.leakyRelu(dsharp.dropout(x, 0.2), 0.1), 1)
+
+        // Compositional API for pipelining Tensor -> Tensor functions (optional, accessed through TensorMath.Compose)
+        dsharp.seed(0)
+        let y3 = x |> dsharp.dropout 0.2 |> dsharp.leakyRelu 0.1 |> dsharp.sum 1
+
+        Assert.CheckEqual(y1, y2)
+        Assert.CheckEqual(y1, y3)
+
+    [<Test>]
+    member _.TestLoadSaveGeneric() =
+        // string
+        let v1 = "Hello, world!"
+        let f1 = System.IO.Path.GetTempFileName()
+        dsharp.save(v1, f1)
+        let v1b = dsharp.load(f1)
+        Assert.CheckEqual(v1, v1b)
+
+        // int
+        let v2 = 128
+        let f2 = System.IO.Path.GetTempFileName()
+        dsharp.save(v2, f2)
+        let v2b = dsharp.load(f2)
+        Assert.CheckEqual(v2, v2b)
+
+        // float
+        let v3 = 3.14
+        let f3 = System.IO.Path.GetTempFileName()
+        dsharp.save(v3, f3)
+        let v3b = dsharp.load(f3)
+        Assert.CheckEqual(v3, v3b)
+
+        // bool
+        let v4 = true
+        let f4 = System.IO.Path.GetTempFileName()
+        dsharp.save(v4, f4)
+        let v4b = dsharp.load(f4)
+        Assert.CheckEqual(v4, v4b)
+
+        // list
+        let v5 = [1, 2, 3]
+        let f5 = System.IO.Path.GetTempFileName()
+        dsharp.save(v5, f5)
+        let v5b = dsharp.load(f5)
+        Assert.CheckEqual(v5, v5b)
+
+        // tuple
+        let v6 = (1, 2, 3)
+        let f6 = System.IO.Path.GetTempFileName()
+        dsharp.save(v6, f6)
+        let v6b = dsharp.load(f6)
+        Assert.CheckEqual(v6, v6b)
+
+        // dict
+        let v7 = [("a", 1), ("b", 2), ("c", 3)]
+        let f7 = System.IO.Path.GetTempFileName()
+        dsharp.save(v7, f7)
+        let v7b = dsharp.load(f7)
+        Assert.CheckEqual(v7, v7b)
+
+        // tuple of dicts
+        let v8 = ([("a", 1), ("b", 2), ("c", 3)], [("a", 1), ("b", 2), ("c", 3)])
+        let f8 = System.IO.Path.GetTempFileName()
+        dsharp.save(v8, f8)
+        let v8b = dsharp.load(f8)
+        Assert.CheckEqual(v8, v8b)
+
+        // tensor
+        let v9 = dsharp.tensor([1, 2, 3])
+        let f9 = System.IO.Path.GetTempFileName()
+        dsharp.save(v9, f9)
+        let v9b = dsharp.load(f9)
+        Assert.CheckEqual(v9, v9b)
+
diff --git a/tests/TensorMath.Tests/TestUtils.fs b/tests/TensorMath.Tests/TestUtils.fs
new file mode 100644
index 0000000..e327256
--- /dev/null
+++ b/tests/TensorMath.Tests/TestUtils.fs
@@ -0,0 +1,41 @@
+// Copyright (c) 2016-     University of Oxford (Atilim Gunes Baydin <gunes@robots.ox.ac.uk>)
+// and other contributors, see LICENSE in root of repository.
+//
+// BSD 2-Clause License. See LICENSE in root of repository.
+
+namespace Tests
+
+open System
+open TensorMath
+open NUnit.Framework
+open NUnit.Framework.Legacy
+
+[<AutoOpen>]
+module TestUtils =
+    let isException f = Assert.Throws<Exception>(TestDelegate(fun () -> f() |> ignore)) |> ignore
+    let isInvalidOp f = Assert.Throws<InvalidOperationException>(TestDelegate(fun () -> f() |> ignore)) |> ignore
+    let isAnyException f = Assert.Catch(TestDelegate(fun () -> f() |> ignore)) |> ignore
+
+    type Assert with 
+        
+        /// Classic assertion style
+        static member AreEqual(actual: 'T, expected: 'T) : unit = ClassicAssert.AreEqual(actual, expected)
+
+        /// Classic assertion style
+        static member AreNotEqual (actual: 'T, expected: 'T) : unit = ClassicAssert.AreNotEqual(actual,expected)
+        
+        /// Classic assertion style
+        static member False = ClassicAssert.False     
+        /// Like Assert.AreEqual but requires that the actual and expected are the same type
+        static member CheckEqual (expected: 'T, actual: 'T) = Assert.That(actual, Is.EqualTo(expected))
+
+    type dsharp with
+        /// <summary>Locally use the given default configuration, returning an IDisposable to revert to the previous configuration.</summary>
+        /// <param name="dtype">The new default element type.</param>
+        /// <param name="device">The new default device.</param>
+        /// <param name="backend">The new default backend.</param>
+        static member useConfig(?dtype: Dtype, ?device: Device, ?backend: Backend) = 
+            let prevConfig = dsharp.config()
+            dsharp.config(?dtype=dtype, ?device=device, ?backend=backend)
+            { new System.IDisposable with member _.Dispose() = dsharp.config(prevConfig) }
+
diff --git a/tests/TensorMath.Tests/Tests.fs b/tests/TensorMath.Tests/Tests.fs
deleted file mode 100644
index 3ad040e..0000000
--- a/tests/TensorMath.Tests/Tests.fs
+++ /dev/null
@@ -1,15 +0,0 @@
-module Tests
-
-open System
-open Xunit
-open FsCheck.Xunit
-
-[<Fact>]
-let ``true is true`` () =
-    Assert.True(true)
-
-// a property-based test are parametrized test, which are tested with a range of random inputs. 
-// This can be helpful to find edge cases that you might not have thought of.
-[<Property>]
-let ``Boolean is true or false`` (b: bool) =
-    b = true || b = false
\ No newline at end of file