From 480448d950906a4d981d5a77e54eb87b267e9723 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 31 Dec 2024 16:44:48 -0500
Subject: [PATCH 01/12] Update onnxruntime main
 2d05c4bcd940aa25561ed7de26481f219618dd7a (#3734)

---
 test/onnx/.onnxrt-commit | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/onnx/.onnxrt-commit b/test/onnx/.onnxrt-commit
index 75454c00440..bc58a653eb9 100644
--- a/test/onnx/.onnxrt-commit
+++ b/test/onnx/.onnxrt-commit
@@ -1 +1 @@
-62e7e24f172a062242acae11575f7ea11529dd09
+2d05c4bcd940aa25561ed7de26481f219618dd7a

From 13159b31c336a3a7ba1a1b29874d641d16c7424d Mon Sep 17 00:00:00 2001
From: Chris Austen <causten@users.noreply.github.com>
Date: Fri, 3 Jan 2025 10:18:14 -0500
Subject: [PATCH 02/12] bump perf checks to 6.3.1 (#3736)

---
 .github/workflows/config.md        | 6 +++---
 .github/workflows/performance.yaml | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/config.md b/.github/workflows/config.md
index 326c99ca483..7c2eb5dcb8a 100644
--- a/.github/workflows/config.md
+++ b/.github/workflows/config.md
@@ -1,7 +1,7 @@
 #=====ROCM INFO=====
-ROCM_VERSION : '6.0.2'
+ROCM_VERSION : '6.3.1'
 #default ROCm version to be used
-ROCM_BASE_IMAGE : 'rocm/dev-ubuntu-20.04'
+ROCM_BASE_IMAGE : 'rocm/dev-ubuntu-22.04'
 #base image from dockerhub to be used
 ROCM_BUILT_IMAGE : 'rocm-migraphx'
 #name of the docker image built upon ROCm base
@@ -26,4 +26,4 @@ PERFORMANCE_TEST_TIMEOUT : '30m'
 
 #===== W A R N I N G =====
 #VARIABLE NAMES NOT TO BE CHANGED, VALUES ONLY!
-#VALUES MUST BE ENGLOSED IN SINGLE QUOTES!
\ No newline at end of file
+#VALUES MUST BE ENGLOSED IN SINGLE QUOTES!
diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml
index 97bc8851abf..c2dcedd66cb 100644
--- a/.github/workflows/performance.yaml
+++ b/.github/workflows/performance.yaml
@@ -12,7 +12,7 @@ on:
       rocm_release:
         description: ROCm Version
         required: true
-        default: '6.0.2'
+        default: '6.3.1'
       performance_reports_repo:
         description: Repository where performance reports are stored
         required: true
@@ -96,4 +96,4 @@ jobs:
     secrets:
       gh_token: ${{ secrets.MIGRAPHX_BOT_TOKEN }}
       mail_user: ${{ secrets.MAIL_USERNAME }}
-      mail_pass: ${{ secrets.MAIL_PASSWORD }}
\ No newline at end of file
+      mail_pass: ${{ secrets.MAIL_PASSWORD }}

From 5f9e048271781341fd3346bcef393081f66fb748 Mon Sep 17 00:00:00 2001
From: Taylor Ding <taylding@amd.com>
Date: Fri, 3 Jan 2025 19:59:19 -0500
Subject: [PATCH 03/12] Enable cpp core guidlines checks. (#3723)

---
 CMakeLists.txt                   | 2 --
 src/include/migraphx/program.hpp | 2 +-
 src/program.cpp                  | 3 +--
 test/ref/add.cpp                 | 2 +-
 4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e00ad7010e1..8291f5a3a6b 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -208,7 +208,6 @@ rocm_enable_clang_tidy(
         -clang-diagnostic-disabled-macro-expansion
         -clang-diagnostic-extern-c-compat
         -clang-diagnostic-unused-command-line-argument
-        -cppcoreguidelines-avoid-capture-default-when-capturing-this
         -cppcoreguidelines-avoid-const-or-ref-data-members
         -cppcoreguidelines-avoid-do-while
         -cppcoreguidelines-explicit-virtual-functions
@@ -222,7 +221,6 @@ rocm_enable_clang_tidy(
         -cppcoreguidelines-pro-type-reinterpret-cast
         -cppcoreguidelines-pro-type-union-access
         -cppcoreguidelines-pro-type-vararg
-        -cppcoreguidelines-rvalue-reference-param-not-moved
         -cppcoreguidelines-special-member-functions
         -cppcoreguidelines-use-default-member-init
         -cppcoreguidelines-virtual-class-destructor
diff --git a/src/include/migraphx/program.hpp b/src/include/migraphx/program.hpp
index 87de258a44b..2a82f381c21 100644
--- a/src/include/migraphx/program.hpp
+++ b/src/include/migraphx/program.hpp
@@ -112,7 +112,7 @@ struct MIGRAPHX_EXPORT program
                      std::size_t batch = 1,
                      bool detailed     = false) const;
 
-    void mark(const parameter_map& params, marker&& m);
+    void mark(const parameter_map& params, marker m);
 
     value to_value() const;
     void from_value(const value& v);
diff --git a/src/program.cpp b/src/program.cpp
index 2d43f3f8d55..5cfb62baf39 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -887,13 +887,12 @@ std::string perf_group(instruction_ref ins, bool detailed)
     return result;
 }
 
-void program::mark(const parameter_map& params, marker&& m)
+void program::mark(const parameter_map& params, marker m)
 {
     auto& ctx = this->impl->contexts;
     // Run once by itself
     eval(params);
     this->finish();
-    // Start marking
     m.mark_start(*this);
     generic_eval(*this, ctx, params, [&](auto ins, auto f) {
         argument result;
diff --git a/test/ref/add.cpp b/test/ref/add.cpp
index 525e74dfa11..148e9b5b08a 100644
--- a/test/ref/add.cpp
+++ b/test/ref/add.cpp
@@ -170,7 +170,7 @@ TEST_CASE(fp32_fp16_test)
         return p;
     };
 
-    auto test_case = [&](std::vector<std::string>&& op_names) {
+    auto test_case = [&](const std::vector<std::string>& op_names) {
         std::vector<float> gold_res = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0};
         auto p                      = create_program();
         migraphx::quantize_fp16(p, op_names);

From 9bc94aba8269d8bdf2c383677f19edce00dd6b87 Mon Sep 17 00:00:00 2001
From: Charlie Lin <charlie.lin@amd.com>
Date: Fri, 3 Jan 2025 21:04:45 -0500
Subject: [PATCH 04/12] Fix docs, add netron output docs (#3740)

The docs were broken for the driver options. Added note about --netron output.
---
 docs/driver/read.rst              |  4 ++++
 docs/migraphx-driver.rst          |  2 ++
 docs/reference/driver-options.rst | 22 +++++++++++-----------
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/docs/driver/read.rst b/docs/driver/read.rst
index 386a05a62ca..db32b11dda7 100755
--- a/docs/driver/read.rst
+++ b/docs/driver/read.rst
@@ -78,6 +78,10 @@ Print out the program as cpp program.
 
 Print out program as json.
 
+.. option::  --netron
+
+Print out program as a Netron viewable json file.
+
 .. option::  --text
 
 Print out program in text format.
diff --git a/docs/migraphx-driver.rst b/docs/migraphx-driver.rst
index 54e25ff84e7..06d021d8247 100644
--- a/docs/migraphx-driver.rst
+++ b/docs/migraphx-driver.rst
@@ -85,6 +85,8 @@ To learn which options can be used with which commands, see the :ref:`MIGraphX d
       - Prints the program in .txt format
    *  - --binary
       - Prints the program in binary format
+   *  - --netron
+      - Prints the program in Netron viewable JSON format
    *  - --output | -o
       - Writes output in a file
    *  - --fill0
diff --git a/docs/reference/driver-options.rst b/docs/reference/driver-options.rst
index e58a3752135..82e75267d28 100644
--- a/docs/reference/driver-options.rst
+++ b/docs/reference/driver-options.rst
@@ -16,7 +16,7 @@ read
 
 Loads and prints input graph.
 
-.. include:: ./driver/read.rst
+.. include:: ../driver/read.rst
 
 compile
 -------
@@ -25,8 +25,8 @@ compile
 
 Compiles and prints input graph.
 
-.. include:: ./driver/read.rst
-.. include:: ./driver/compile.rst
+.. include:: ../driver/read.rst
+.. include:: ../driver/compile.rst
 
 run
 ---
@@ -35,8 +35,8 @@ run
 
 Loads and prints input graph.
 
-.. include:: ./driver/read.rst
-.. include:: ./driver/compile.rst
+.. include:: ../driver/read.rst
+.. include:: ../driver/compile.rst
 
 perf
 ----
@@ -45,8 +45,8 @@ perf
 
 Compiles and runs input graph then prints performance report.
 
-.. include:: ./driver/read.rst
-.. include:: ./driver/compile.rst
+.. include:: ../driver/read.rst
+.. include:: ../driver/compile.rst
 
 .. option::  --iterations, -n [unsigned int]
 
@@ -59,8 +59,8 @@ verify
 
 Runs reference and CPU or GPU implementations and checks outputs for consistency.
 
-.. include:: ./driver/read.rst
-.. include:: ./driver/compile.rst
+.. include:: ../driver/read.rst
+.. include:: ../driver/compile.rst
 
 .. option::  --rms-tol [double]
 
@@ -104,5 +104,5 @@ Here is how you can use ``roctx`` combined with :doc:`rocprof <rocprofiler:rocpr
 Running :doc:`rocprof <rocprofiler:rocprofv1>` generates trace information for HIP, HCC and ROCTX in separate ``.txt`` files.
 To understand the interactions between API calls, utilize the :ref:`roctx.py <tools>` helper script.
 
-.. include:: ./driver/read.rst
-.. include:: ./driver/compile.rst
+.. include:: ../driver/read.rst
+.. include:: ../driver/compile.rst

From 64647a002f67757709324843f51e1561da9dba97 Mon Sep 17 00:00:00 2001
From: Richa Gadgil <richa.gadgil@amd.com>
Date: Wed, 8 Jan 2025 15:49:29 -0800
Subject: [PATCH 05/12] Fix License Checker (#3745)

---
 .github/workflows/ci.yaml |  6 ++--
 tools/check_stamped.py    | 66 ++++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index ca7a722b9c6..5859bf23156 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -351,12 +351,14 @@ jobs:
         docker-images: true
 
     - uses: actions/checkout@v4.1.1
+      with:
+        fetch-depth: 0 # Fetch the entire repository history and all branches
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
         python-version: 3.8
-    - name: run License Check
-      run: python3 tools/check_stamped.py ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
+    - name: Run License Check
+      run: python3 tools/check_stamped.py origin/${{ github.event_name == 'pull_request' && github.base_ref || 'develop' }}
 
   linux:
 
diff --git a/tools/check_stamped.py b/tools/check_stamped.py
index 793f60d1ac1..51502f53024 100644
--- a/tools/check_stamped.py
+++ b/tools/check_stamped.py
@@ -2,7 +2,7 @@
 #####################################################################################
 #  The MIT License (MIT)
 #
-#  Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+#  Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
 #
 #  Permission is hereby granted, free of charge, to any person obtaining a copy
 #  of this software and associated documentation files (the "Software"), to deal
@@ -30,9 +30,7 @@
 # in the license stamp, with the assumption being that any modifications/creations will need to be stamped to the year that the
 # modification/creation was made.
 #####################################################################################
-import subprocess, sys, datetime, argparse
-
-debug = False
+import subprocess, sys, datetime, argparse, os
 
 current_year = datetime.date.today().year
 
@@ -63,7 +61,7 @@ def hasKeySequence(inputfile: str, key_message: str) -> bool:
 
 
 # Simple just open and write stuff to each file with the license stamp
-def needStampCheck(filename: str) -> bool:
+def needStampCheck(filename: str, debug: bool) -> bool:
     # open save old contents and append things here
     if debug: print("Open", filename, end=' ')
     #Empty name isn't a filename
@@ -111,29 +109,52 @@ def check_filename(filename: str, fileTuple: tuple or list) -> bool:
     return False
 
 
-def main(branch) -> None:
-    unsupported_file_types.extend(specificIgnores)
+def eval(cmd, **kwargs):
+    return subprocess.run(cmd,
+                          capture_output=True,
+                          shell=isinstance(cmd, str),
+                          check=True,
+                          **kwargs).stdout.decode('utf-8').strip()
+
+
+def is_excluded(f):
+    base = os.path.basename(f)
+    return base in unsupported_file_types
+
+
+def get_top():
+    return eval("git rev-parse --show-toplevel")
+
 
-    ## Get a list of all files (not including deleted) that have changed/added in comparison to the latest Dev branch from MI Graphx
+def get_head():
+    return eval("git rev-parse --abbrev-ref HEAD")
 
-    # Subprocess 1 is fetching the latest dev branch from the MIgraphX Url and naming it as 'FETCH_HEAD'
-    subprocess.run(
-        "git fetch https://github.com/ROCmSoftwarePlatform/AMDMIGraphX {0} --quiet"
-        .format(branch),
-        shell=True,
-        stdout=subprocess.PIPE)
 
-    # proc 2 is getting the list of file differences between FETCH_HEAD and the branch to be merged. (filters out deleted files from FETCH_HEAD)
-    proc = subprocess.run("git diff --name-only --diff-filter=d FETCH_HEAD",
-                          shell=True,
-                          stdout=subprocess.PIPE)
-    fileList = proc.stdout.decode().split('\n')
+def get_merge_base(branch):
+    head = get_head()
+    return eval(f"git merge-base {branch} {head}")
+
+
+def get_files_changed(against):
+    files = eval(
+        f"git diff-index --cached --name-only --diff-filter=d {against}",
+        cwd=get_top()).splitlines()
+    return (f for f in files
+            if f.endswith(supported_file_types) and not is_excluded(f))
+
+
+def main(branch, debug) -> None:
+    unsupported_file_types.extend(specificIgnores)
+
+    fileList = list(get_files_changed(branch))
 
-    if debug: print(f"Target file list {len(fileList)}:\n" + str(fileList))
+    if debug:
+        print(f"Branch: {branch}, Target file list {len(fileList)}:\n" +
+              str(fileList))
 
     for file in fileList:
         if check_filename(file, supported_file_types):
-            if needStampCheck(file) and not check_filename(
+            if needStampCheck(file, debug) and not check_filename(
                     file, unsupported_file_types):
                 unstampedFiles.append(file)
             else:
@@ -172,6 +193,7 @@ def main(branch) -> None:
 
     parser = argparse.ArgumentParser()
     parser.add_argument("branch")
+    parser.add_argument('-d', '--debug', action='store_true')
     args = parser.parse_args()
 
-    main(args.branch)
+    main(args.branch, args.debug)

From ac18275d58bc70de73a5a962d652406b28d8d357 Mon Sep 17 00:00:00 2001
From: Artur Wojcik <artur.wojcik@amd.com>
Date: Thu, 9 Jan 2025 14:39:28 +0100
Subject: [PATCH 06/12] missing export on base64_encode() (#3748)

---
 src/include/migraphx/base64.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/migraphx/base64.hpp b/src/include/migraphx/base64.hpp
index 36035430826..2af7a60b201 100644
--- a/src/include/migraphx/base64.hpp
+++ b/src/include/migraphx/base64.hpp
@@ -31,7 +31,7 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 
 /// encode string to base64
-std::string base64_encode(const std::string& str);
+std::string MIGRAPHX_EXPORT base64_encode(const std::string& str);
 
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

From 6d02806ef6cf5e45f2c503284615129862f3a178 Mon Sep 17 00:00:00 2001
From: spolifroni-amd <Sandra.Polifroni@amd.com>
Date: Thu, 9 Jan 2025 08:40:41 -0500
Subject: [PATCH 07/12] Update LICENSE to 2025 (#3744)

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index e50259a9d70..a11ab36baf0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

From eb1717d5bfaaf27c9ed06e3dd3bad309137ec3a9 Mon Sep 17 00:00:00 2001
From: Charlie Lin <charlie.lin@amd.com>
Date: Mon, 13 Jan 2025 15:06:06 -0500
Subject: [PATCH 08/12] FP8 OCP to FP8 FNUZ on hardware with only FP8 FNUZ
 support (#3684)

NANOO is short for NAN On Overflow, the data type comes from this paper: https://arxiv.org/pdf/2206.02915
Implements the method written about in Convert OCP FP8 model to FNUZ model inside MIGraphX #2717
This pass must run before simplify_qdq so that the adjusted scales and zero points are propagated to after the quantized operator.
The test in test/fp8_ocp_to_nanoo.cpp checks the pass works with simplify_qdq and does the expected operations
The test in test/ref/fp8_ocp_to_nanoo.cpp checks the pass produces the same result before and after
I will make a separate PR that removes the gpu context changes to get the gfx number
Fixed the cpp_generator that was using __builtin_nan incorrectly
---
 src/CMakeLists.txt                        |   1 +
 src/cpp_generator.cpp                     |   2 +-
 src/fp8_ocp_to_fnuz.cpp                   | 178 +++++++++++++++++
 src/include/migraphx/fp8_ocp_to_fnuz.hpp  |  49 +++++
 src/include/migraphx/match/dq_helpers.hpp |  62 ++++++
 src/include/migraphx/op/bit_cast.hpp      |   1 +
 src/simplify_qdq.cpp                      |  26 +--
 src/targets/gpu/target.cpp                |   3 +
 test/fp8_ocp_to_fnuz_test.cpp             | 224 +++++++++++++++++++++
 test/include/quantize_helpers.hpp         |  73 +++++++
 test/ref/fp8_ocp_to_fnuz.cpp              | 226 ++++++++++++++++++++++
 test/simplify_qdq_test.cpp                |  66 +------
 test/verify/test_fp8_ocp_to_fnuz_gemm.cpp |  60 ++++++
 13 files changed, 887 insertions(+), 84 deletions(-)
 create mode 100644 src/fp8_ocp_to_fnuz.cpp
 create mode 100644 src/include/migraphx/fp8_ocp_to_fnuz.hpp
 create mode 100644 src/include/migraphx/match/dq_helpers.hpp
 create mode 100644 test/fp8_ocp_to_fnuz_test.cpp
 create mode 100644 test/include/quantize_helpers.hpp
 create mode 100644 test/ref/fp8_ocp_to_fnuz.cpp
 create mode 100644 test/verify/test_fp8_ocp_to_fnuz_gemm.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 550bb30bd42..0076fc928c4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -58,6 +58,7 @@ add_library(migraphx
     file_buffer.cpp
     fileutils.cpp
     fp_to_double.cpp
+    fp8_ocp_to_fnuz.cpp
     fuse_concat.cpp
     fuse_pointwise.cpp
     fuse_pointwise_reduce.cpp
diff --git a/src/cpp_generator.cpp b/src/cpp_generator.cpp
index 433ccaadb5b..2aa808778b9 100644
--- a/src/cpp_generator.cpp
+++ b/src/cpp_generator.cpp
@@ -221,7 +221,7 @@ cpp_generator::function cpp_generator::generate_module(const module& m,
                             string_literal = "-__builtin_huge_val()";
                     }
                     else if(std::isnan(static_cast<double>(x)))
-                        string_literal = "__builtin_nan()";
+                        string_literal = "__builtin_nan(\"0\")";
                     else
                         string_literal = ins->get_literal().to_string();
                 });
diff --git a/src/fp8_ocp_to_fnuz.cpp b/src/fp8_ocp_to_fnuz.cpp
new file mode 100644
index 00000000000..305ca6058f1
--- /dev/null
+++ b/src/fp8_ocp_to_fnuz.cpp
@@ -0,0 +1,178 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/fp8_ocp_to_fnuz.hpp>
+#include <migraphx/matcher.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/match/dq_helpers.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace {
+
+using fp8::fp8e4m3fnuz;
+
+std::unordered_set<std::string> get_quantizable_op_names()
+{
+    static std::unordered_set<std::string> s = {"convolution", "dot"};
+    return s;
+}
+
+struct match_fp8ocp_convert_to_fp8fnuz
+{
+    auto matcher() const
+    {
+        auto dq1 = match::arg(0)(
+            skip_post_dq_ops(match::dequantizelinear_op("scale1", "zp1").bind("dq1")));
+        auto dq2 = match::arg(1)(
+            skip_post_dq_ops(match::dequantizelinear_op("scale2", "zp2").bind("dq2")));
+        return match::name(get_quantizable_op_names())(dq1, dq2);
+    }
+
+    static auto bit_cast_and_handle_specials(module& m,
+                                             const instruction_ref dq,
+                                             const instruction_ref x,
+                                             const instruction_ref bits_0x80_lit,
+                                             const instruction_ref bits_0x7f_lit,
+                                             const instruction_ref bits_0xff_lit,
+                                             const instruction_ref bits_0x00_lit)
+    {
+        auto x_lens     = x->get_shape().lens();
+        auto cast_input = m.insert_instruction(
+            dq, make_op("bit_cast", {{"target_type", shape::fp8e4m3fnuz_type}}), x);
+        auto mb_bits_0x80_lit = m.insert_instruction(
+            dq, make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0x80_lit);
+        auto mb_bits_0x7f_lit = m.insert_instruction(
+            dq, make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0x7f_lit);
+        auto mb_bits_0xff_lit = m.insert_instruction(
+            dq, make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0xff_lit);
+        auto mb_zero_lit = m.insert_instruction(
+            dq, make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0x00_lit);
+        // negative zero in fp8e4m3fn to zero in fp8e4m3fnuz
+        // a == 0x80 ? 0x0 : a
+        auto is_neg_zero = m.insert_instruction(dq, make_op("equal"), cast_input, mb_bits_0x80_lit);
+        auto ret = m.insert_instruction(dq, make_op("where"), is_neg_zero, mb_zero_lit, cast_input);
+
+        // positive and negative NaN in fp8e4m3fn to NaN in fp8e4m3fnuz
+        // (a == 0x7f or a == 0xff) ? 0x80 : a
+        auto eq_0x7f = m.insert_instruction(dq, make_op("equal"), ret, mb_bits_0x7f_lit);
+
+        auto eq_0xff = m.insert_instruction(dq, make_op("equal"), ret, mb_bits_0xff_lit);
+
+        auto cond = m.insert_instruction(dq, make_op("logical_or"), eq_0x7f, eq_0xff);
+        ret       = m.insert_instruction(dq, make_op("where"), cond, mb_bits_0x80_lit, ret);
+        return ret;
+    }
+
+    // Add the same broadcast instructions after adjusted scales or
+    // adjusted zero points from after the originals. Similar to
+    // propagate_quantized_ins in simplify_qdq.
+    static auto propagate_broadcasts(module& m,
+                                     const instruction_ref adj,
+                                     const instruction_ref ori,
+                                     const instruction_ref start,
+                                     const instruction_ref insert_pt)
+    {
+        auto prev_ins = start;
+        std::vector<instruction_ref> ins_between;
+        // matcher skips continguous, multi/broadcasts and transposes, collect all those
+        // instructions
+        while(prev_ins != ori)
+        {
+            ins_between.push_back(prev_ins);
+            prev_ins = prev_ins->inputs().front();
+        }
+        auto ret = adj;
+        for(auto ins : reverse_iterator_for(ins_between))
+        {
+            ret = m.insert_instruction(insert_pt, (*ins)->get_operator(), {ret});
+        }
+        return ret;
+    }
+
+    static auto cast_to_fnuz(module& m,
+                             const instruction_ref dq,
+                             const instruction_ref input,
+                             const instruction_ref dq_scale,
+                             const instruction_ref dq_zp)
+    {
+        auto x                             = input;
+        std::vector<fp8e4m3fnuz> bits_0x80 = {fp8e4m3fnuz(0x80, fp8e4m3fnuz::from_bits())};
+        auto bits_0x80_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0x80);
+
+        std::vector<fp8e4m3fnuz> bits_0x7f = {fp8e4m3fnuz(0x7f, fp8e4m3fnuz::from_bits())};
+        auto bits_0x7f_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0x7f);
+
+        std::vector<fp8e4m3fnuz> bits_0xff = {fp8e4m3fnuz(0xff, fp8e4m3fnuz::from_bits())};
+        auto bits_0xff_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0xff);
+
+        std::vector<fp8e4m3fnuz> bits_0x00 = {fp8e4m3fnuz(0x00, fp8e4m3fnuz::from_bits())};
+        auto bits_0x00_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0x00);
+
+        x = bit_cast_and_handle_specials(
+            m, dq, x, bits_0x80_lit, bits_0x7f_lit, bits_0xff_lit, bits_0x00_lit);
+        auto adj_dq_zp = bit_cast_and_handle_specials(
+            m, dq, dq_zp, bits_0x80_lit, bits_0x7f_lit, bits_0xff_lit, bits_0x00_lit);
+
+        // adj_scale = 2 * scale
+        auto two_lit = m.add_literal(literal{shape{dq_scale->get_shape().type()}, {2}});
+        two_lit      = m.insert_instruction(
+            dq, make_op("multibroadcast", {{"out_lens", dq_scale->get_shape().lens()}}), two_lit);
+        auto adj_dq_scale = m.insert_instruction(dq, make_op("mul"), dq_scale, two_lit);
+
+        adj_dq_scale = propagate_broadcasts(m, adj_dq_scale, dq_scale, dq->inputs().at(1), dq);
+        adj_dq_zp    = propagate_broadcasts(m, adj_dq_zp, dq_zp, dq->inputs().at(2), dq);
+        m.replace_instruction(dq, make_op("dequantizelinear"), x, adj_dq_scale, adj_dq_zp);
+    }
+
+    auto apply(module& m, const match::matcher_result& r) const
+    {
+        auto dq1    = r.instructions["dq1"];
+        auto dq2    = r.instructions["dq2"];
+        auto scale1 = r.instructions["scale1"];
+        auto scale2 = r.instructions["scale2"];
+        auto zp1    = r.instructions["zp1"];
+        auto zp2    = r.instructions["zp2"];
+
+        std::set<migraphx::shape::type_t> supported_types = {migraphx::shape::fp8e4m3fn_type};
+        if(not contains(supported_types, dq1->inputs().front()->get_shape().type()) or
+           not contains(supported_types, dq2->inputs().front()->get_shape().type()))
+            return;
+
+        cast_to_fnuz(m, dq1, dq1->inputs().front(), scale1, zp1);
+        cast_to_fnuz(m, dq2, dq2->inputs().front(), scale2, zp2);
+    }
+};
+
+} // namespace
+
+void fp8_ocp_to_fnuz::apply(module_pass_manager& mpm) const
+{
+    module_ref mm = &mpm.get_module();
+    match::find_matches(*mm, match_fp8ocp_convert_to_fp8fnuz{});
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
diff --git a/src/include/migraphx/fp8_ocp_to_fnuz.hpp b/src/include/migraphx/fp8_ocp_to_fnuz.hpp
new file mode 100644
index 00000000000..19e4a1cda02
--- /dev/null
+++ b/src/include/migraphx/fp8_ocp_to_fnuz.hpp
@@ -0,0 +1,49 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_RTGLIB_FP8_OCP_TO_FNUZ_HPP
+#define MIGRAPHX_GUARD_RTGLIB_FP8_OCP_TO_FNUZ_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/pass_manager.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+/**
+ * Convert fp8e4m3fn to fp8e4m3fnuz for hardware that only supports fp8e4m3fnuz data types
+ * intrinsically. Conversion uses the same bit representation and adjusts scaling factors at the
+ * dequantization. Using the same bit representation from fp8e4m3fn to fp8e4m3fnuz halves the
+ * floating point representation. This pass should run before simplify_qdq so that the scales and
+ * zero points calculated by simplify_qdq have the correct adjusted scaling factors
+ */
+struct MIGRAPHX_EXPORT fp8_ocp_to_fnuz
+{
+    std::string name() const { return "fp8_ocp_to_fnuz"; }
+    void apply(module_pass_manager& mpm) const;
+};
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
diff --git a/src/include/migraphx/match/dq_helpers.hpp b/src/include/migraphx/match/dq_helpers.hpp
new file mode 100644
index 00000000000..cdb40ae977e
--- /dev/null
+++ b/src/include/migraphx/match/dq_helpers.hpp
@@ -0,0 +1,62 @@
+
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_MATCH_DQ_HELPERS_HPP
+#define MIGRAPHX_GUARD_MATCH_DQ_HELPERS_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/matcher.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace match {
+
+/**
+ * Find dequantizelinear (DQ) instruction with constant scale and zero point input
+ * while skipping broadcast instructions between DQ and scale/zero point. Used
+ * in simplify_qdq and fp8_ocp_to_fnuz.
+ */
+inline auto dequantizelinear_op(const std::string& scale, const std::string& zp)
+{
+    return match::name("dequantizelinear")(
+        match::arg(1)(match::skip_broadcasts(match::is_constant().bind(scale))),
+        match::arg(2)(match::skip_broadcasts(match::is_constant().bind(zp))));
+}
+
+/**
+ * Skip certain operators after DQ instruction.
+ * Used in simplify_qdq and fp8_ocp_to_fnuz.
+ */
+template <class... Ms>
+auto skip_post_dq_ops(Ms... ms)
+{
+    return match::skip(match::name(
+        "broadcast", "multibroadcast", "contiguous", "transpose", "reshape", "convert"))(ms...);
+}
+
+} // namespace match
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
diff --git a/src/include/migraphx/op/bit_cast.hpp b/src/include/migraphx/op/bit_cast.hpp
index eb233ad8b36..0112342a14b 100644
--- a/src/include/migraphx/op/bit_cast.hpp
+++ b/src/include/migraphx/op/bit_cast.hpp
@@ -80,6 +80,7 @@ struct bit_cast : unary<bit_cast>
             args[0].visit([&](auto input) {
                 using itype = typename decltype(input)::value_type;
                 if constexpr(sizeof(otype) == sizeof(itype))
+
                 {
                     par_transform(input.begin(), input.end(), output.begin(), [&](auto x) {
                         return migraphx::bit_cast<otype>(x);
diff --git a/src/simplify_qdq.cpp b/src/simplify_qdq.cpp
index 86c2100a995..bd21564b618 100644
--- a/src/simplify_qdq.cpp
+++ b/src/simplify_qdq.cpp
@@ -36,18 +36,12 @@
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/register_op.hpp>
 #include <migraphx/fp8_types.hpp>
+#include <migraphx/match/dq_helpers.hpp>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace {
 
-template <class... Ms>
-auto skip_post_dq_ops(Ms... ms)
-{
-    return match::skip(match::name(
-        "broadcast", "multibroadcast", "contiguous", "transpose", "reshape", "convert"))(ms...);
-}
-
 std::unordered_set<std::string> get_quantizable_op_names()
 {
     static std::unordered_set<std::string> s = {"convolution", "dot"};
@@ -117,20 +111,12 @@ struct match_find_quantizable_ops
         return qinp;
     }
 
-    static auto dequantizelinear_op(const std::string& scale, const std::string& zp)
-    {
-        return match::name("dequantizelinear")(
-            match::arg(0)(match::skip(match::name("quantizelinear"))(match::any())),
-            match::arg(1)(match::skip_broadcasts(match::is_constant().bind(scale))),
-            match::arg(2)(match::skip_broadcasts(match::is_constant().bind(zp))));
-    }
-
     auto matcher() const
     {
-        auto dq1 =
-            match::arg(0)(skip_post_dq_ops(dequantizelinear_op("scale1", "zp1").bind("dq1")));
-        auto dq2 =
-            match::arg(1)(skip_post_dq_ops(dequantizelinear_op("scale2", "zp2").bind("dq2")));
+        auto dq1 = match::arg(0)(
+            skip_post_dq_ops(match::dequantizelinear_op("scale1", "zp1").bind("dq1")));
+        auto dq2 = match::arg(1)(
+            skip_post_dq_ops(match::dequantizelinear_op("scale2", "zp2").bind("dq2")));
         return match::name(get_quantizable_op_names())(dq1, dq2);
     }
 
@@ -231,7 +217,9 @@ struct match_find_quantizable_ops
                    is_valid_qparam(zp1, out_lens, out_lens.size() - 2) and
                    is_valid_qparam(scale2, out_lens, out_lens.size() - 1) and
                    is_valid_qparam(zp2, out_lens, out_lens.size() - 1)))
+            {
                 return;
+            }
 
             // This implementation supports both arguments being per-axis affine quantized
             // In practice, inputs are per-tensor affine and weights are per-axis symmetric
diff --git a/src/targets/gpu/target.cpp b/src/targets/gpu/target.cpp
index ad98fb680fe..bf2fc3e8655 100644
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -31,6 +31,7 @@
 #include <migraphx/eliminate_data_type.hpp>
 #include <migraphx/eliminate_identity.hpp>
 #include <migraphx/eliminate_pad.hpp>
+#include <migraphx/fp8_ocp_to_fnuz.hpp>
 #include <migraphx/fuse_concat.hpp>
 #include <migraphx/fuse_pointwise_reduce.hpp>
 #include <migraphx/inline_module.hpp>
@@ -179,6 +180,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
         dead_code_elimination{},
         eliminate_identity{},
         dead_code_elimination{},
+        enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), fp8_ocp_to_fnuz{}),
+        enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), dead_code_elimination{}),
         simplify_qdq{},
         enable_pass(not mlir_enabled(), rewrite_quantization{}),
         dead_code_elimination{},
diff --git a/test/fp8_ocp_to_fnuz_test.cpp b/test/fp8_ocp_to_fnuz_test.cpp
new file mode 100644
index 00000000000..8369c879716
--- /dev/null
+++ b/test/fp8_ocp_to_fnuz_test.cpp
@@ -0,0 +1,224 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <cmath>
+#include <limits>
+
+#include <migraphx/float8.hpp>
+#include <migraphx/half.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/fp8_ocp_to_fnuz.hpp>
+#include <migraphx/simplify_qdq.hpp>
+#include <migraphx/propagate_constant.hpp>
+#include <migraphx/eliminate_common_subexpression.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/make_op.hpp>
+
+#include <test.hpp>
+#include <quantize_helpers.hpp>
+
+using migraphx::make_op;
+using migraphx::shape;
+using migraphx::fp8::fp8e4m3fnuz;
+
+void run_fp8_ocp_to_fnuz(migraphx::module& m)
+{
+    migraphx::run_passes(m, {migraphx::fp8_ocp_to_fnuz{}, migraphx::dead_code_elimination{}});
+}
+
+void run_simplify_qdq(migraphx::module& m)
+{
+    run_passes(m, {migraphx::simplify_qdq{}, migraphx::dead_code_elimination{}});
+}
+
+void run_cse_pc(migraphx::module& m, const std::unordered_set<std::string>& skip_ops = {})
+{
+    run_passes(m,
+               {migraphx::eliminate_common_subexpression{},
+                migraphx::dead_code_elimination{},
+                migraphx::propagate_constant{skip_ops},
+                migraphx::dead_code_elimination{}});
+}
+
+auto bit_cast_and_handle_specials(migraphx::module& m,
+                                  const migraphx::instruction_ref x,
+                                  const migraphx::instruction_ref bits_0x80_lit,
+                                  const migraphx::instruction_ref bits_0x7f_lit,
+                                  const migraphx::instruction_ref bits_0xff_lit,
+                                  const migraphx::instruction_ref bits_0x00_lit)
+{
+    auto x_lens = x->get_shape().lens();
+    auto cast_input =
+        m.add_instruction(make_op("bit_cast", {{"target_type", shape::fp8e4m3fnuz_type}}), x);
+    auto mb_bits_0x80_lit =
+        m.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0x80_lit);
+    auto mb_bits_0x7f_lit =
+        m.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0x7f_lit);
+    auto mb_bits_0xff_lit =
+        m.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0xff_lit);
+    auto mb_zero_lit =
+        m.add_instruction(make_op("multibroadcast", {{"out_lens", x_lens}}), bits_0x00_lit);
+    // negative zero in fp8e4m3fn to zero in fp8e4m3fnuz
+    // a == 0x80 ? 0x0 : a
+    auto is_neg_zero = m.add_instruction(make_op("equal"), cast_input, mb_bits_0x80_lit);
+    auto ret         = m.add_instruction(make_op("where"), is_neg_zero, mb_zero_lit, cast_input);
+
+    // positive and negative NaN in fp8e4m3fn to NaN in fp8e4m3fnuz
+    // (a == 0x7f or a == 0xff) ? 0x80 : a
+    auto eq_0x7f = m.add_instruction(make_op("equal"), ret, mb_bits_0x7f_lit);
+
+    auto eq_0xff = m.add_instruction(make_op("equal"), ret, mb_bits_0xff_lit);
+
+    auto cond = m.add_instruction(make_op("logical_or"), eq_0x7f, eq_0xff);
+    ret       = m.add_instruction(make_op("where"), cond, mb_bits_0x80_lit, ret);
+    return ret;
+}
+
+auto cast_fp8_helper(migraphx::module& m,
+                     const migraphx::instruction_ref dq_input,
+                     const migraphx::instruction_ref dq_scale,
+                     const migraphx::instruction_ref dq_zp)
+{
+    auto dq_input_lens                 = dq_input->get_shape().lens();
+    std::vector<fp8e4m3fnuz> bits_0x80 = {fp8e4m3fnuz(0x80, fp8e4m3fnuz::from_bits())};
+    std::vector<fp8e4m3fnuz> bits_0x7f = {fp8e4m3fnuz(0x7f, fp8e4m3fnuz::from_bits())};
+    std::vector<fp8e4m3fnuz> bits_0xff = {fp8e4m3fnuz(0xff, fp8e4m3fnuz::from_bits())};
+    std::vector<fp8e4m3fnuz> bits_0x00 = {fp8e4m3fnuz(0x00, fp8e4m3fnuz::from_bits())};
+    auto bits_0x80_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0x80);
+    auto bits_0x7f_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0x7f);
+    auto bits_0xff_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0xff);
+    auto bits_0x00_lit = m.add_literal(shape{shape::fp8e4m3fnuz_type, {1}, {0}}, bits_0x00);
+
+    auto cast_input = bit_cast_and_handle_specials(
+        m, dq_input, bits_0x80_lit, bits_0x7f_lit, bits_0xff_lit, bits_0x00_lit);
+    auto adj_zp = bit_cast_and_handle_specials(
+        m, dq_zp, bits_0x80_lit, bits_0x7f_lit, bits_0xff_lit, bits_0x00_lit);
+
+    auto two_lit = m.add_literal(migraphx::literal{shape{dq_scale->get_shape().type()}, {2}});
+    two_lit      = m.add_instruction(
+        make_op("multibroadcast", {{"out_lens", dq_scale->get_shape().lens()}}), two_lit);
+    auto adj_dq_scale = m.add_instruction(make_op("mul"), dq_scale, two_lit);
+
+    return std::vector<migraphx::instruction_ref>{cast_input, adj_dq_scale, adj_zp};
+}
+
+TEST_CASE(fp8_gemm_conversion)
+{
+    using migraphx::fp8::fp8e4m3fn;
+    using migraphx::fp8::fp8e4m3fnuz;
+    std::vector<std::size_t> data_lens = {2, 3, 8, 8};
+    migraphx::module m1;
+    {
+        auto a     = m1.add_parameter("a", {migraphx::shape::float_type, data_lens});
+        auto b     = m1.add_parameter("b", {migraphx::shape::float_type, data_lens});
+        auto scale = m1.add_literal(0.5f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            m1.add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(m1, "quantizelinear", a, scale, zero);
+        auto qb = add_quantize_op(m1, "quantizelinear", b, scale, zero);
+        auto da =
+            add_quantize_op(m1, "dequantizelinear", qa, qa->inputs().at(1), qa->inputs().at(2));
+        auto db =
+            add_quantize_op(m1, "dequantizelinear", qb, qb->inputs().at(1), qb->inputs().at(2));
+        auto dot = m1.add_instruction(migraphx::make_op("dot"), da, db);
+        m1.add_return({dot});
+    }
+    run_fp8_ocp_to_fnuz(m1);
+
+    // expected after fp8_ocp_to_fnuz
+    migraphx::module m2;
+    {
+        auto a     = m2.add_parameter("a", {migraphx::shape::float_type, data_lens});
+        auto b     = m2.add_parameter("b", {migraphx::shape::float_type, data_lens});
+        auto scale = m2.add_literal(0.5f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            m2.add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(m2, "quantizelinear", a, scale, zero);
+        auto qb = add_quantize_op(m2, "quantizelinear", b, scale, zero);
+
+        auto outs_a = cast_fp8_helper(m2, qa, scale, zero);
+        auto adj_a  = outs_a.at(0);
+        auto mb_scales_a =
+            m2.add_instruction(make_op("multibroadcast", {{"out_lens", data_lens}}), outs_a.at(1));
+        auto mb_zp_a =
+            m2.add_instruction(make_op("multibroadcast", {{"out_lens", data_lens}}), outs_a.at(2));
+        auto da = m2.add_instruction(make_op("dequantizelinear"), adj_a, mb_scales_a, mb_zp_a);
+
+        auto outs_b = cast_fp8_helper(m2, qb, scale, zero);
+        auto adj_b  = outs_b.at(0);
+        auto mb_scales_b =
+            m2.add_instruction(make_op("multibroadcast", {{"out_lens", data_lens}}), outs_b.at(1));
+        auto mb_zp_b =
+            m2.add_instruction(make_op("multibroadcast", {{"out_lens", data_lens}}), outs_b.at(2));
+        auto db = m2.add_instruction(make_op("dequantizelinear"), adj_b, mb_scales_b, mb_zp_b);
+
+        auto dot = m2.add_instruction(migraphx::make_op("dot"), da, db);
+        m2.add_return({dot});
+    }
+
+    EXPECT(m1 == m2);
+
+    // expected after simplify_qdq
+    migraphx::module m3;
+    {
+        auto a     = m3.add_parameter("a", {migraphx::shape::float_type, {2, 3, 8, 8}});
+        auto b     = m3.add_parameter("b", {migraphx::shape::float_type, {2, 3, 8, 8}});
+        auto scale = m3.add_literal(0.5f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            m3.add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(m3, "quantizelinear", a, scale, zero);
+        auto qb = add_quantize_op(m3, "quantizelinear", b, scale, zero);
+
+        auto outs_a      = cast_fp8_helper(m3, qa, qa->inputs().at(1), qa->inputs().at(2));
+        auto outs_b      = cast_fp8_helper(m3, qb, qb->inputs().at(1), qb->inputs().at(2));
+        auto adj_qa      = outs_a.at(0);
+        auto adj_scale_a = outs_a.at(1);
+        auto adj_qb      = outs_b.at(0);
+        auto adj_scale_b = outs_b.at(1);
+
+        auto dot = m3.add_instruction(migraphx::make_op("quant_dot"), adj_qa, adj_qb);
+
+        auto out_scale = add_scale_mul(m3, adj_scale_a, adj_scale_b, 1, 1, dot->get_shape().lens());
+        auto dq_out    = add_quantize_op(m3, "dequantizelinear", dot, out_scale);
+        m3.add_return({dq_out});
+    }
+
+    run_simplify_qdq(m1);
+    // running propagate constant to simplify adjustments to literals
+    // could pass the test without, but a tedious amount of instructions to rearrange
+    run_cse_pc(m1);
+    run_cse_pc(m3);
+    EXPECT(m1 == m3);
+    m1.debug_print();
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
diff --git a/test/include/quantize_helpers.hpp b/test/include/quantize_helpers.hpp
new file mode 100644
index 00000000000..43bde67199e
--- /dev/null
+++ b/test/include/quantize_helpers.hpp
@@ -0,0 +1,73 @@
+#include <migraphx/program.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/make_op.hpp>
+
+#ifndef MIGRAPHX_GUARD_TEST_INCLUDE_QUANTIZE_HELPERS_HPP
+#define MIGRAPHX_GUARD_TEST_INCLUDE_QUANTIZE_HELPERS_HPP
+
+inline migraphx::instruction_ref broadcast_scale(migraphx::module& m,
+                                                 migraphx::instruction_ref scale,
+                                                 const std::vector<std::size_t>& out_lens,
+                                                 std::size_t axis)
+{
+    if(scale->get_shape().lens() == out_lens)
+        return scale;
+
+    migraphx::instruction_ref scale_mb;
+    auto scale_lens = scale->get_shape().lens();
+    if(scale_lens.front() == 1 and scale_lens.size() == 1)
+        scale_mb =
+            m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), scale);
+    else
+        scale_mb = m.add_instruction(
+            migraphx::make_op("broadcast", {{"axis", axis}, {"out_lens", out_lens}}), scale);
+    return scale_mb;
+}
+
+inline migraphx::instruction_ref broadcast_shift(migraphx::module& m,
+                                                 migraphx::instruction_ref shift,
+                                                 const std::vector<std::size_t>& out_lens)
+{
+    if(shift->get_shape().lens() == out_lens)
+        return shift;
+    return m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), shift);
+}
+
+inline migraphx::instruction_ref add_scale_mul(migraphx::module& m,
+                                               migraphx::instruction_ref scale1,
+                                               migraphx::instruction_ref scale2,
+                                               std::size_t axis1,
+                                               std::size_t axis2,
+                                               const std::vector<std::size_t>& out_lens)
+{
+    auto scale1_mb = broadcast_scale(m, scale1, out_lens, axis1);
+    auto scale2_mb = broadcast_scale(m, scale2, out_lens, axis2);
+    return m.add_instruction(migraphx::make_op("mul"), scale1_mb, scale2_mb);
+}
+
+inline migraphx::instruction_ref add_quantize_op(migraphx::module& m,
+                                                 const std::string& name,
+                                                 migraphx::instruction_ref x,
+                                                 migraphx::instruction_ref scale,
+                                                 migraphx::instruction_ref shift,
+                                                 std::size_t q_axis = 1)
+{
+    auto lens     = x->get_shape().lens();
+    auto scale_mb = broadcast_scale(m, scale, lens, q_axis);
+    auto shift_mb = broadcast_shift(m, shift, lens);
+    return m.add_instruction(migraphx::make_op(name), x, scale_mb, shift_mb);
+}
+
+inline migraphx::instruction_ref add_quantize_op(migraphx::module& m,
+                                                 const std::string& name,
+                                                 migraphx::instruction_ref x,
+                                                 migraphx::instruction_ref scale,
+                                                 std::size_t q_axis = 1)
+{
+    auto lens     = x->get_shape().lens();
+    auto scale_mb = broadcast_scale(m, scale, lens, q_axis);
+    return m.add_instruction(migraphx::make_op(name), x, scale_mb);
+}
+
+#endif // MIGRAPHX_GUARD_TEST_INCLUDE_QUANTIZE_HELPERS_HPP
diff --git a/test/ref/fp8_ocp_to_fnuz.cpp b/test/ref/fp8_ocp_to_fnuz.cpp
new file mode 100644
index 00000000000..d0a00df9565
--- /dev/null
+++ b/test/ref/fp8_ocp_to_fnuz.cpp
@@ -0,0 +1,226 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/instruction.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/program.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/fp8_ocp_to_fnuz.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/generate.hpp>
+
+#include <test.hpp>
+#include <quantize_helpers.hpp>
+
+/**
+ * test that before and after the fp8_ocp_to_fnuz pass
+ * have equivalent results
+ */
+
+void run_fp8_ocp_to_fnuz(migraphx::module& m)
+{
+    migraphx::run_passes(m, {migraphx::fp8_ocp_to_fnuz{}, migraphx::dead_code_elimination{}});
+}
+
+TEST_CASE(fp8_ocp_to_fnuz_gemm)
+{
+    using migraphx::fp8::fp8e4m3fn;
+    using migraphx::fp8::fp8e4m3fnuz;
+    std::vector<std::size_t> data_lens = {2, 2};
+    migraphx::shape data_shape{migraphx::shape::float_type, data_lens};
+
+    migraphx::program p1;
+    auto* m1 = p1.get_main_module();
+    {
+        auto a     = m1->add_parameter("a", data_shape);
+        auto b     = m1->add_parameter("b", data_shape);
+        auto scale = m1->add_literal(0.5f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            m1->add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(*m1, "quantizelinear", a, scale, zero);
+        auto qb = add_quantize_op(*m1, "quantizelinear", b, scale, zero);
+        auto da =
+            add_quantize_op(*m1, "dequantizelinear", qa, qa->inputs().at(1), qa->inputs().at(2));
+        auto db =
+            add_quantize_op(*m1, "dequantizelinear", qb, qb->inputs().at(1), qb->inputs().at(2));
+        auto dot = m1->add_instruction(migraphx::make_op("dot"), da, db);
+        m1->add_return({dot});
+    }
+
+    migraphx::program p2 = p1;
+    migraphx::module* m2 = p2.get_main_module();
+    run_fp8_ocp_to_fnuz(*m2);
+
+    p1.compile(migraphx::make_target("ref"));
+    p2.compile(migraphx::make_target("ref"));
+
+    migraphx::parameter_map params;
+    std::vector<float> a_data = {20, -100, 100, 0.25};
+    std::vector<float> b_data = {28, 0.125, 2.5, 0.25};
+    params["a"]               = migraphx::argument(data_shape, a_data.data());
+    params["b"]               = migraphx::argument(data_shape, b_data.data());
+
+    auto result_1 = p1.eval({params}).back();
+    auto result_2 = p2.eval({params}).back();
+    std::vector<float> results_vector_1(4);
+    std::vector<float> results_vector_2(4);
+    result_1.visit([&](auto output) { results_vector_1.assign(output.begin(), output.end()); });
+    result_2.visit([&](auto output) { results_vector_2.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify::verify_rms_range(results_vector_1, results_vector_2));
+}
+
+TEST_CASE(fp8_ocp_to_fnuz_gemm_multi_scale)
+{
+    using migraphx::fp8::fp8e4m3fn;
+    using migraphx::fp8::fp8e4m3fnuz;
+    std::vector<std::size_t> data_lens = {3, 3};
+    migraphx::shape data_shape{migraphx::shape::float_type, data_lens};
+    migraphx::shape scales_shape{migraphx::shape::float_type, {3}};
+
+    migraphx::program p1;
+    auto* m1 = p1.get_main_module();
+    {
+        auto a      = m1->add_parameter("a", data_shape);
+        auto b      = m1->add_parameter("b", data_shape);
+        auto scale1 = m1->add_literal(migraphx::generate_literal(scales_shape, 0));
+        auto scale2 = m1->add_literal(0.4f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            m1->add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(*m1, "quantizelinear", a, scale1, zero);
+        auto qb = add_quantize_op(*m1, "quantizelinear", b, scale2, zero);
+        auto da =
+            add_quantize_op(*m1, "dequantizelinear", qa, qa->inputs().at(1), qa->inputs().at(2));
+        auto db =
+            add_quantize_op(*m1, "dequantizelinear", qb, qb->inputs().at(1), qb->inputs().at(2));
+        auto dot = m1->add_instruction(migraphx::make_op("dot"), da, db);
+        m1->add_return({dot});
+    }
+
+    migraphx::program p2 = p1;
+    migraphx::module* m2 = p2.get_main_module();
+    run_fp8_ocp_to_fnuz(*m2);
+
+    p1.compile(migraphx::make_target("ref"));
+    p2.compile(migraphx::make_target("ref"));
+
+    migraphx::parameter_map params;
+    std::vector<float> a_data = {20, -100, 100, 0.25, 0.3, 3.3, 5.0, -8.0, 63.0};
+    std::vector<float> b_data = {28, 0.125, 2.5, 0.25, 0.0582, -187, 0.716, 8.12, 1.87};
+    params["a"]               = migraphx::argument(data_shape, a_data.data());
+    params["b"]               = migraphx::argument(data_shape, b_data.data());
+
+    auto result_1 = p1.eval({params}).back();
+    auto result_2 = p2.eval({params}).back();
+    std::vector<float> results_vector_1(9);
+    std::vector<float> results_vector_2(9);
+    result_1.visit([&](auto output) { results_vector_1.assign(output.begin(), output.end()); });
+    result_2.visit([&](auto output) { results_vector_2.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify::verify_rms_range(results_vector_1, results_vector_2));
+}
+
+TEST_CASE(fp8_ocp_to_fnuz_conv)
+{
+    using migraphx::fp8::fp8e4m3fn;
+    using migraphx::fp8::fp8e4m3fnuz;
+    std::vector<std::size_t> data_lens = {2, 2};
+    migraphx::shape data_shape{migraphx::shape::float_type, data_lens};
+
+    migraphx::program p1;
+    auto* m1 = p1.get_main_module();
+    {
+        std::vector<float> a_data = {
+            2.71567607,  -0.9960829,  0.91671127,  0.28140706,  0.63235772,  0.08077253,
+            0.80927712,  -0.59108931, -1.05421555, -2.76622486, -0.85044265, -0.52049929,
+            0.67726439,  -0.65290606, 0.02345525,  -0.33579525, 0.38901961,  1.05473483,
+            -1.31188095, 1.8963089,   -0.07265259, 0.947339,    0.41949373,  -0.70814759,
+            0.25892952,  1.07311416,  1.2571274,   -0.62318051, -0.19951548, -0.94232577,
+            -0.29393643, 0.42292568,  -0.80230367, 1.40909171,  0.63617158,  0.13900366,
+            1.09253144,  -0.15265895, 1.54781747,  0.72780299,  1.09189606,  -0.38068101,
+            0.97057933,  -0.58958799, 1.56188643,  0.21474874,  0.58725154,  -1.27097559,
+            -0.03024297, 1.09437096,  -0.4897908,  0.34838957,  -1.31042492, -1.69069934,
+            0.86956722,  -0.40457946, 0.46691212,  1.29273605,  0.26464137,  0.22073045,
+            -1.02178168, 0.22163901,  -1.84387338, 0.75522131,  -0.45775682, -0.42241111,
+            -1.50944722, 1.07256448,  -1.95876884, -0.28106022, 0.3341668,   2.13129425,
+            -1.14728117, -1.06555498, -0.298444,   -0.88322699, -0.65866792, -2.06007552,
+            0.01374334,  0.45612028,  0.52715492,  1.01914406,  -1.72659791, 0.80650896,
+            0.16860051,  2.24112225,  -0.78620857, 0.36566174,  -0.07020134, -0.47976932,
+            -0.68230027, -0.94711417, -0.54506505, 1.66504931,  -0.71860826, 0.61132306};
+
+        std::vector<float> b_data = {
+            2.82721668e-02,  6.44195229e-02,  1.53499246e-02,  1.72468081e-01,  -6.33238107e-02,
+            9.49496776e-02,  1.40258059e-01,  -7.92879611e-02, -1.29301161e-01, 3.11307609e-03,
+            -1.90624535e-01, 1.13238767e-01,  -2.80647576e-02, 3.12882811e-02,  -3.52091640e-02,
+            3.33581865e-02,  6.43158704e-02,  7.40238279e-02,  -1.00106120e-01, -9.56912562e-02,
+            1.44342467e-01,  9.40258950e-02,  6.36333972e-02,  1.66158378e-03,  -8.91554281e-02,
+            2.58734226e-02,  1.70919895e-02,  1.78214177e-01,  8.84564668e-02,  8.98126513e-02,
+            -1.63809001e-01, 1.37802169e-01,  1.66439757e-01,  -1.45631135e-02, 1.88469887e-04,
+            4.76950556e-02,  -1.91969007e-01, -1.76233292e-01, -7.70473927e-02, 1.14828631e-01,
+            1.76608220e-01,  -1.50728196e-01, 1.99946314e-02,  -5.88052124e-02, 1.31612435e-01,
+            1.61106288e-02,  -1.35080189e-01, 1.49512306e-01,  3.86456847e-02,  1.29330024e-01,
+            -3.22975963e-02, -5.60784787e-02, -5.41997552e-02, 4.78562862e-02};
+
+        migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 4, 4}};
+        auto a = m1->add_literal(migraphx::literal{a_shape, a_data});
+
+        migraphx::shape b_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
+        auto b     = m1->add_literal(migraphx::literal{b_shape, b_data});
+        auto scale = m1->add_literal(0.5f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            m1->add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(*m1, "quantizelinear", a, scale, zero);
+        auto qb = add_quantize_op(*m1, "quantizelinear", b, scale, zero);
+        auto da =
+            add_quantize_op(*m1, "dequantizelinear", qa, qa->inputs().at(1), qa->inputs().at(2));
+        auto db =
+            add_quantize_op(*m1, "dequantizelinear", qb, qb->inputs().at(1), qb->inputs().at(2));
+        auto conv_ins = m1->add_instruction(migraphx::make_op("convolution"), da, db);
+        m1->add_return({conv_ins});
+    }
+
+    migraphx::program p2 = p1;
+    migraphx::module* m2 = p2.get_main_module();
+    run_fp8_ocp_to_fnuz(*m2);
+
+    p1.compile(migraphx::make_target("ref"));
+    p2.compile(migraphx::make_target("ref"));
+
+    auto result_1 = p1.eval({}).back();
+    auto result_2 = p2.eval({}).back();
+    std::vector<float> results_vector_1(16);
+    std::vector<float> results_vector_2(16);
+    result_1.visit([&](auto output) { results_vector_1.assign(output.begin(), output.end()); });
+    result_2.visit([&](auto output) { results_vector_2.assign(output.begin(), output.end()); });
+    EXPECT(migraphx::verify::verify_rms_range(results_vector_1, results_vector_2));
+}
diff --git a/test/simplify_qdq_test.cpp b/test/simplify_qdq_test.cpp
index c3c50cb4172..cef500fbfd3 100644
--- a/test/simplify_qdq_test.cpp
+++ b/test/simplify_qdq_test.cpp
@@ -26,6 +26,7 @@
 #include <migraphx/register_target.hpp>
 #include <migraphx/instruction.hpp>
 #include <test.hpp>
+#include <quantize_helpers.hpp>
 #include <migraphx/make_op.hpp>
 #include <migraphx/op/pooling.hpp>
 #include <migraphx/eliminate_common_subexpression.hpp>
@@ -45,75 +46,12 @@ void run_pass(migraphx::module& m)
 {
     run_passes(m, {migraphx::simplify_qdq{}, migraphx::dead_code_elimination{}});
 }
+
 void run_cse(migraphx::module& m)
 {
     run_passes(m, {migraphx::eliminate_common_subexpression{}, migraphx::dead_code_elimination{}});
 }
 
-migraphx::instruction_ref broadcast_scale(migraphx::module& m,
-                                          migraphx::instruction_ref scale,
-                                          const std::vector<std::size_t>& out_lens,
-                                          std::size_t axis)
-{
-    if(scale->get_shape().lens() == out_lens)
-        return scale;
-
-    migraphx::instruction_ref scale_mb;
-    auto scale_lens = scale->get_shape().lens();
-    if(scale_lens.front() == 1 and scale_lens.size() == 1)
-        scale_mb =
-            m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), scale);
-    else
-        scale_mb = m.add_instruction(
-            migraphx::make_op("broadcast", {{"axis", axis}, {"out_lens", out_lens}}), scale);
-    return scale_mb;
-}
-
-migraphx::instruction_ref broadcast_shift(migraphx::module& m,
-                                          migraphx::instruction_ref shift,
-                                          const std::vector<std::size_t>& out_lens)
-{
-    if(shift->get_shape().lens() == out_lens)
-        return shift;
-    return m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", out_lens}}), shift);
-}
-
-migraphx::instruction_ref add_quantize_op(migraphx::module& m,
-                                          const std::string& name,
-                                          migraphx::instruction_ref x,
-                                          migraphx::instruction_ref scale,
-                                          migraphx::instruction_ref shift,
-                                          std::size_t q_axis = 1)
-{
-    auto lens     = x->get_shape().lens();
-    auto scale_mb = broadcast_scale(m, scale, lens, q_axis);
-    auto shift_mb = broadcast_shift(m, shift, lens);
-    return m.add_instruction(migraphx::make_op(name), x, scale_mb, shift_mb);
-}
-
-migraphx::instruction_ref add_quantize_op(migraphx::module& m,
-                                          const std::string& name,
-                                          migraphx::instruction_ref x,
-                                          migraphx::instruction_ref scale,
-                                          std::size_t q_axis = 1)
-{
-    auto lens     = x->get_shape().lens();
-    auto scale_mb = broadcast_scale(m, scale, lens, q_axis);
-    return m.add_instruction(migraphx::make_op(name), x, scale_mb);
-}
-
-migraphx::instruction_ref add_scale_mul(migraphx::module& m,
-                                        migraphx::instruction_ref scale1,
-                                        migraphx::instruction_ref scale2,
-                                        std::size_t axis1,
-                                        std::size_t axis2,
-                                        const std::vector<std::size_t>& out_lens)
-{
-    auto scale1_mb = broadcast_scale(m, scale1, out_lens, axis1);
-    auto scale2_mb = broadcast_scale(m, scale2, out_lens, axis2);
-    return m.add_instruction(migraphx::make_op("mul"), scale1_mb, scale2_mb);
-}
-
 migraphx::instruction_ref init_zero_point(migraphx::module& m, migraphx::instruction_ref q_ins)
 {
     auto zp = m.add_literal(migraphx::literal{migraphx::shape{q_ins->get_shape().type()}, {0}});
diff --git a/test/verify/test_fp8_ocp_to_fnuz_gemm.cpp b/test/verify/test_fp8_ocp_to_fnuz_gemm.cpp
new file mode 100644
index 00000000000..88fc9828034
--- /dev/null
+++ b/test/verify/test_fp8_ocp_to_fnuz_gemm.cpp
@@ -0,0 +1,60 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/make_op.hpp>
+#include <quantize_helpers.hpp>
+
+struct test_fp8_ocp_to_fnuz_gemm : verify_program<test_fp8_ocp_to_fnuz_gemm>
+{
+    using fp8e4m3fn   = migraphx::fp8::fp8e4m3fn;
+    using fp8e4m3fnuz = migraphx::fp8::fp8e4m3fnuz;
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto* mm                           = p.get_main_module();
+        std::vector<std::size_t> data_lens = {2, 2};
+        migraphx::shape data_shape{migraphx::shape::float_type, data_lens};
+        auto a     = mm->add_parameter("a", data_shape);
+        auto b     = mm->add_parameter("b", data_shape);
+        auto scale = mm->add_literal(0.5f);
+        std::vector<fp8e4m3fn> data;
+        data.push_back(fp8e4m3fn{0.f});
+        auto zero =
+            mm->add_literal(migraphx::shape{migraphx::shape::fp8e4m3fn_type, {1}, {0}}, data);
+
+        auto qa = add_quantize_op(*mm, "quantizelinear", a, scale, zero);
+        auto qb = add_quantize_op(*mm, "quantizelinear", b, scale, zero);
+        auto da =
+            add_quantize_op(*mm, "dequantizelinear", qa, qa->inputs().at(1), qa->inputs().at(2));
+        auto db =
+            add_quantize_op(*mm, "dequantizelinear", qb, qb->inputs().at(1), qb->inputs().at(2));
+        auto dot = mm->add_instruction(migraphx::make_op("dot"), da, db);
+        mm->add_return({dot});
+        return p;
+    }
+    std::string section() const { return "gemm"; }
+};

From c87d983112dd54393bb244fd094e90f283180b62 Mon Sep 17 00:00:00 2001
From: Richa Gadgil <richa.gadgil@amd.com>
Date: Mon, 13 Jan 2025 12:07:42 -0800
Subject: [PATCH 09/12] mxr to onnx (#3682)

Changes to migraphx_py to expose information about instruction_ref to engineer ONNX file.
---
 src/py/migraphx_py.cpp          |  63 +++++++++++-
 tools/converters/mxr_to_onnx.py | 173 ++++++++++++++++++++++++++++++++
 2 files changed, 232 insertions(+), 4 deletions(-)
 create mode 100644 tools/converters/mxr_to_onnx.py

diff --git a/src/py/migraphx_py.cpp b/src/py/migraphx_py.cpp
index 75f7fab09d9..14beffb0324 100644
--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -44,6 +44,7 @@
 #include <migraphx/float8.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/version.h>
+#include <migraphx/iterator_for.hpp>
 #ifdef HAVE_GPU
 #include <migraphx/gpu/hip.hpp>
 #endif
@@ -259,6 +260,41 @@ py::buffer_info to_buffer_info(T& x)
     return b;
 }
 
+py::object to_py_object(const migraphx::value& val)
+{
+    py::object result;
+
+    val.visit_value([&](const auto& x) {
+        if constexpr(std::is_same<std::decay_t<decltype(x)>, std::vector<migraphx::value>>{})
+        {
+            if(val.is_object())
+            {
+                py::dict py_dict;
+                for(const auto& item : x)
+                {
+                    py_dict[py::str(item.get_key())] = to_py_object(item.without_key());
+                }
+                result = py_dict;
+            }
+            else
+            {
+                py::list py_list;
+                for(const auto& item : x)
+                {
+                    py_list.append(to_py_object(item));
+                }
+                result = py_list;
+            }
+        }
+        else
+        {
+            result = py::cast(x);
+        }
+    });
+
+    return result;
+}
+
 migraphx::shape to_shape(const py::buffer_info& info)
 {
     migraphx::shape::type_t t;
@@ -380,7 +416,16 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
 
     py::class_<migraphx::instruction_ref>(m, "instruction_ref")
         .def("shape", [](migraphx::instruction_ref i) { return i->get_shape(); })
-        .def("op", [](migraphx::instruction_ref i) { return i->get_operator(); });
+        .def("op", [](migraphx::instruction_ref i) { return i->get_operator(); })
+        .def("inputs", [](migraphx::instruction_ref i) { return i->inputs(); })
+        .def("name", [](migraphx::instruction_ref i) { return i->name(); })
+        .def("__hash__",
+             [](const migraphx::instruction_ref& i) {
+                 return std::hash<migraphx::instruction_ref>()(i);
+             })
+        .def("__eq__", [](const migraphx::instruction_ref& i, const migraphx::instruction_ref& j) {
+            return std::equal_to<migraphx::instruction_ref>()(i, j);
+        });
 
     py::class_<migraphx::module, std::unique_ptr<migraphx::module, py::nodelete>>(m, "module")
         .def("print", [](const migraphx::module& mm) { std::cout << mm << std::endl; })
@@ -422,7 +467,14 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
                 return mm.add_return(args);
             },
             py::arg("args"))
-        .def("__repr__", [](const migraphx::module& mm) { return migraphx::to_string(mm); });
+        .def("__repr__", [](const migraphx::module& mm) { return migraphx::to_string(mm); })
+        .def(
+            "__iter__",
+            [](const migraphx::module& mm) {
+                auto r = migraphx::iterator_for(mm);
+                return py::make_iterator(r.begin(), r.end());
+            },
+            py::keep_alive<0, 1>());
 
     py::class_<migraphx::program>(m, "program")
         .def(py::init([]() { return migraphx::program(); }))
@@ -502,7 +554,10 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
           }
           return migraphx::make_op(name, v);
       }))
-        .def("name", &migraphx::operation::name);
+        .def("name", &migraphx::operation::name)
+        .def("values", [](const migraphx::operation& operation) -> py::object {
+            return to_py_object(operation.to_value());
+        });
 
     py::enum_<migraphx::op::pooling_mode>(op, "pooling_mode")
         .value("average", migraphx::op::pooling_mode::average)
diff --git a/tools/converters/mxr_to_onnx.py b/tools/converters/mxr_to_onnx.py
new file mode 100644
index 00000000000..0919b82cfce
--- /dev/null
+++ b/tools/converters/mxr_to_onnx.py
@@ -0,0 +1,173 @@
+import migraphx
+import onnx
+from onnx import helper, TensorProto, checker
+import numpy as np
+import os
+import argparse
+
+
+# Utility function to map MIGraphX types to ONNX data types
+def get_dtype(instruction):
+    type_mapping = {
+        'float_type': TensorProto.FLOAT,
+        'bf16_type': TensorProto.BFLOAT16
+    }
+    return type_mapping[instruction.shape().type_string()]
+
+
+# Utility function to get the shape of an instruction
+def get_shape(instruction):
+    if isinstance(instruction, list):
+        raise ValueError("Expected instruction, got a list.")
+    return instruction.shape().lens()
+
+
+# Utility function to map MIGraphX operations to ONNX operations
+def map_operation(operation):
+    mxr_to_onnx_op = {
+        "dot": "MatMul",
+        "mul": "MatMul",
+        "add": "Add",
+        "multibroadcast": "Expand",
+        "erf": "Erf",
+        "tanh": "Tanh",
+        "exp": "Exp",
+        "div": "Div",
+        "relu": "Relu"
+    }
+
+    if operation not in mxr_to_onnx_op:
+        raise NotImplementedError(f"Operation '{operation}' is not supported.")
+    return mxr_to_onnx_op[operation]
+
+
+# Helper function to create ONNX nodes for specific operations
+def create_node(instruction, parameters, node_name, n, initializers):
+    if node_name == "multibroadcast" or node_name == "reshape":
+        shape_key = "out_lens" if node_name == "multibroadcast" else "dims"
+        shape_array = np.array(parameters[shape_key], dtype=np.int64)
+        initializer_name = f"{node_name}_shape_{n}"
+
+        initializers.append(
+            helper.make_tensor(name=initializer_name,
+                               data_type=TensorProto.INT64,
+                               dims=shape_array.shape,
+                               vals=shape_array.flatten().tolist()))
+        return helper.make_node(
+            map_operation(node_name),
+            inputs=[str(hash(i))
+                    for i in instruction.inputs()] + [initializer_name],
+            outputs=[str(hash(instruction))])
+
+    elif node_name == "transpose":
+        return helper.make_node(
+            "Transpose",
+            inputs=[str(hash(i)) for i in instruction.inputs()],
+            outputs=[str(hash(instruction))],
+            perm=parameters["permutation"])
+
+    elif node_name == "convolution":
+        return helper.make_node(
+            "Conv",
+            inputs=[str(hash(i)) for i in instruction.inputs()],
+            outputs=[str(hash(instruction))
+                     ],  #[str(hash(i)) for i in instruction.outputs()],
+            dilations=parameters["dilation"],
+            group=parameters["group"],
+            pads=parameters["padding"],
+            strides=parameters["stride"])
+
+    return helper.make_node(
+        map_operation(node_name),
+        inputs=[str(hash(i)) for i in instruction.inputs()],
+        outputs=[str(hash(instruction))])
+
+
+# Main function to convert MIGraphX module to ONNX model
+def generate_onnx(module):
+    inputs = {}
+    operations = []
+    initializers = []
+    n = 0  # Node counter
+    output = None
+
+    for instruction in module:
+        op_name = instruction.op().name()
+
+        # Handle input nodes
+        if op_name in ["@literal", "@param"]:
+
+            inputs[str(hash(instruction))] = helper.make_tensor_value_info(
+                str(hash(instruction)), get_dtype(instruction),
+                get_shape(instruction))
+
+        # Handle computational nodes
+        elif "@" not in op_name:
+            n += 1
+            parameters = instruction.op().values()
+
+            operations.append(
+                create_node(instruction, parameters, op_name, n, initializers))
+
+        # Handle return node
+        elif op_name == "@return":
+
+            output = [
+                helper.make_tensor_value_info(str(hash(i)), get_dtype(i),
+                                              get_shape(i))
+                for i in instruction.inputs()
+            ]
+
+    # Create the ONNX graph
+    graph = helper.make_graph(nodes=operations,
+                              name="Graph",
+                              inputs=list(inputs.values()),
+                              initializer=initializers,
+                              outputs=output if output else [])
+
+    return helper.make_model(graph, producer_name="onnx-dot-add-example")
+
+
+# Main function to process MIGraphX files and generate ONNX models
+def main(mxr_directory_path, onnx_directory_path):
+    for file_name in os.listdir(mxr_directory_path):
+        file_path = os.path.join(mxr_directory_path, file_name)
+        if ".mxr" in file_path:
+            try:
+                program = migraphx.load(file_path)
+                module = program.get_main_module()
+                model = generate_onnx(module)
+
+                # Validate the generated ONNX model
+                try:
+                    checker.check_model(model)
+                    print(f"ONNX model for {file_path} is valid.")
+                except onnx.checker.ValidationError as e:
+                    print(f"Validation failed for {file_path}: {e}")
+                except Exception as e:
+                    print(
+                        f"Unexpected error during validation for {file_path}: {e}"
+                    )
+
+                os.makedirs(onnx_directory_path, exist_ok=True)
+                onnx_file_path = os.path.join(onnx_directory_path,
+                                              file_name.replace("mxr", "onnx"))
+                onnx.save(model, onnx_file_path)
+
+            except Exception as e:
+                print(f"Error processing {file_path}: {e}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Process MXR files and generate ONNX models.")
+    parser.add_argument("mxr_directory_path",
+                        type=str,
+                        help="Path to the directory containing MXR files.")
+    parser.add_argument(
+        "onnx_directory_path",
+        type=str,
+        help="Path to the directory where ONNX models will be saved.")
+
+    args = parser.parse_args()
+    main(args.mxr_directory_path, args.onnx_directory_path)

From 02a585d829124d7ca9f4670695ebfa3d0c21934c Mon Sep 17 00:00:00 2001
From: Richa Gadgil <richa.gadgil@amd.com>
Date: Mon, 13 Jan 2025 19:06:48 -0800
Subject: [PATCH 10/12] Pass tests/gpu/jit.cpp with BF16 (#3639)

---
 .../kernels/include/migraphx/kernels/math.hpp |  3 ++-
 .../include/migraphx/kernels/type_traits.hpp  |  4 +++-
 test/gpu/jit.cpp                              | 22 +++++++++----------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/math.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
index cadcc05f577..368ee9bc5fa 100644
--- a/src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -172,6 +172,7 @@ constexpr auto where(bool cond, const T& a, const U& b)
 MIGRAPHX_DEVICE_MATH_FOR(float, abs, ::abs)
 MIGRAPHX_DEVICE_MATH_FOR(double, abs, ::abs)
 MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, abs, ::__habs)
+MIGRAPHX_DEVICE_MATH_FOR(migraphx::bf16, abs, ::fabsf)
 MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, max, ::fmaxf)
 MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, min, ::fminf)
 MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, max, ::max)
diff --git a/src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp b/src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
index 1b0d1343ea2..24b7d4a5b22 100644
--- a/src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -262,6 +262,8 @@ constexpr T numeric_max()
         return __FLT_MAX__;
     else if constexpr(is_same<T, migraphx::half>{})
         return __FLT16_MAX__;
+    else if constexpr(is_same<T, migraphx::bf16>{})
+        return 338953138925153547590470800371487866880.000000;
     else
         return 0;
 }
diff --git a/test/gpu/jit.cpp b/test/gpu/jit.cpp
index 65abfc03020..d9412cb24f5 100644
--- a/test/gpu/jit.cpp
+++ b/test/gpu/jit.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -383,13 +383,10 @@ TEST_CASE(compile_math)
     auto vec_sizes = {2, 4, 6};
     for(auto&& t : migraphx::shape::types())
     {
-        if(contains({migraphx::shape::bool_type,
-                     migraphx::shape::tuple_type,
-                     migraphx::shape::bf16_type},
-                    t))
+        if(contains({migraphx::shape::bool_type, migraphx::shape::tuple_type}, t))
             continue;
         auto name = migraphx::shape::cpp_type(t);
-        if(t == migraphx::shape::half_type)
+        if(contains({migraphx::shape::half_type, migraphx::shape::bf16_type}, t))
             name.insert(0, "migraphx::");
         data_types.push_back(name);
         // fp8 doesn't have vectorization support yet, therefore skip it for now.
@@ -444,15 +441,16 @@ TEST_CASE(assert_type_min_max)
     migraphx::gpu::context ctx;
     for(auto&& t : migraphx::shape::types())
     {
-        if(contains({migraphx::shape::bool_type,
-                     migraphx::shape::tuple_type,
-                     migraphx::shape::bf16_type},
-                    t))
+        if(contains(
+               {
+                   migraphx::shape::bool_type,
+                   migraphx::shape::tuple_type,
+               },
+               t))
             continue;
         auto name = migraphx::shape::cpp_type(t);
-        if(t == migraphx::shape::half_type)
+        if(contains({migraphx::shape::half_type, migraphx::shape::bf16_type}, t))
             name.insert(0, "migraphx::");
-
         migraphx::shape::visit(t, [&](auto as) {
             std::string min = "";
             std::string max = "";

From 9a342999a7ebd85ee6633d8809b3dcac295c7a09 Mon Sep 17 00:00:00 2001
From: Richa Gadgil <richa.gadgil@amd.com>
Date: Tue, 14 Jan 2025 12:09:34 -0800
Subject: [PATCH 11/12] Bf16 Verify Onnx Tests (#3627)

Tests with different ONNX representations than half:
- mean_bf16_test
- instance_norm_bf16 tests

Tests for half that do not seem to have BF16 counterpart:
- mod_test_half
---
 src/onnx/onnx_parser.cpp                      |   7 +-
 src/onnx/parse_instancenorm.cpp               |   5 +-
 test/onnx/add_bf16_test.onnx                  |  16 +
 test/onnx/eyelike_bf16_test.onnx              |  11 +
 test/onnx/gelu_default_bf16_test.onnx         |  11 +
 test/onnx/gemm_bf16_test.onnx                 | Bin 0 -> 193 bytes
 test/onnx/gen_onnx.py                         | 418 +++++++++++++++++-
 test/onnx/gridsample_bf16_test.onnx           | Bin 0 -> 236 bytes
 test/onnx/group_norm_3d_bf16_test.onnx        |  26 ++
 test/onnx/group_norm_4d_bf16_test.onnx        |  28 ++
 test/onnx/group_norm_5d_bf16_test.onnx        |  30 ++
 test/onnx/group_norm_small_eps_bf16_test.onnx |  26 ++
 test/onnx/hardsigmoid_bf16_test.onnx          |  15 +
 test/onnx/imagescaler_bf16_test.onnx          | Bin 0 -> 177 bytes
 test/onnx/instance_norm_bf16_test.onnx        |  25 ++
 .../instance_norm_dyn_batch_bf16_test.onnx    | Bin 0 -> 204 bytes
 test/onnx/isinf_bf16_test.onnx                |  11 +
 test/onnx/isnan_bf16_test.onnx                |  11 +
 test/onnx/layer_norm_3d_bf16_test.onnx        |  24 +
 test/onnx/layer_norm_4d_bf16_test.onnx        |  26 ++
 test/onnx/layer_norm_small_eps_bf16_test.onnx |  17 +
 test/onnx/mean_bf16_test.onnx                 |  25 ++
 test/onnx/mod_test_fmod_bf16.onnx             |  20 +
 test/onnx/mvn_default_axes_bf16_test.onnx     |  15 +
 test/onnx/mvn_rank_2_bf16_test.onnx           |  12 +
 test/onnx/mvn_rank_3_bf16_test.onnx           | Bin 0 -> 163 bytes
 ..._kd_mean_reduction_bf16_weighted_test.onnx |  25 ++
 ...kd_mean_reduction_bf16_weighted_test2.onnx |  23 +
 test/onnx/parse/add_bf16_test.cpp             |  39 ++
 test/onnx/parse/eyelike_bf16_test.cpp         |  46 ++
 test/onnx/parse/gemm_bf16_test.cpp            |  56 +++
 test/onnx/parse/group_norm_3d_bf16_test.cpp   |  34 ++
 test/onnx/parse/group_norm_4d_bf16_test.cpp   |  34 ++
 test/onnx/parse/group_norm_5d_bf16_test.cpp   |  39 ++
 .../parse/group_norm_small_eps_bf16_test.cpp  |  34 ++
 test/onnx/parse/hardsigmoid_bf16_test.cpp     |  58 +++
 test/onnx/parse/imagescaler_bf16_test.cpp     |  47 ++
 test/onnx/parse/instance_norm_bf16_test.cpp   |  63 +++
 .../instance_norm_dyn_batch_bf16_test.cpp     |  64 +++
 test/onnx/parse/isinf_bf16_test.cpp           |  38 ++
 test/onnx/parse/isnan_bf16_test.cpp           |  38 ++
 test/onnx/parse/layer_norm_3d_bf16_test.cpp   |  35 ++
 test/onnx/parse/layer_norm_4d_bf16_test.cpp   |  35 ++
 .../parse/layer_norm_small_eps_bf16_test.cpp  |  35 ++
 test/onnx/parse/mean_bf16_test.cpp            |  46 ++
 test/onnx/parse/mod_test_fmod_bf16.cpp        |  38 ++
 ...oodloss_kd_all_reduction_weighted_test.cpp |  87 +++-
 test/onnx/parse/size_bf16_test.cpp            |  36 ++
 ...rossentropyloss_2d_mean_reduction_test.cpp |  46 +-
 ...xcrossentropyloss_2d_no_reduction_test.cpp |  49 +-
 ...crossentropyloss_2d_sum_reduction_test.cpp |  46 +-
 ...opyloss_kd_all_reduction_weighted_test.cpp |  90 +++-
 test/onnx/round_bf16_test.onnx                |  11 +
 test/onnx/size_bf16_test.onnx                 |  11 +
 ...tropyloss_2d_mean_reduction_bf16_test.onnx |  17 +
 ...entropyloss_2d_no_reduction_bf16_test.onnx |  17 +
 ...ntropyloss_2d_sum_reduction_bf16_test.onnx |  17 +
 ..._kd_mean_reduction_bf16_weighted_test.onnx |  25 ++
 test/onnx/verify/add_bf16_test.cpp            |  49 ++
 test/onnx/verify/gelu_default_bf16_test.cpp   |  56 +++
 test/onnx/verify/gemm_bf16_test.cpp           |  71 +++
 test/onnx/verify/gridsample_bf16_test.cpp     |  61 +++
 test/onnx/verify/group_norm_3d_bf16_test.cpp  |  46 ++
 test/onnx/verify/isinf_bf16_test.cpp          |  50 +++
 test/onnx/verify/layer_norm_3d_bf16_test.cpp  |  46 ++
 .../verify/mvn_default_axes_bf16_test.cpp     |  51 +++
 test/onnx/verify/mvn_rank_2_bf16_test.cpp     |  36 ++
 test/onnx/verify/mvn_rank_3_bf16_test.cpp     |  43 ++
 test/onnx/verify/round_bf16_test.cpp          |  64 +++
 69 files changed, 2616 insertions(+), 15 deletions(-)
 create mode 100644 test/onnx/add_bf16_test.onnx
 create mode 100644 test/onnx/eyelike_bf16_test.onnx
 create mode 100644 test/onnx/gelu_default_bf16_test.onnx
 create mode 100644 test/onnx/gemm_bf16_test.onnx
 create mode 100644 test/onnx/gridsample_bf16_test.onnx
 create mode 100644 test/onnx/group_norm_3d_bf16_test.onnx
 create mode 100644 test/onnx/group_norm_4d_bf16_test.onnx
 create mode 100644 test/onnx/group_norm_5d_bf16_test.onnx
 create mode 100644 test/onnx/group_norm_small_eps_bf16_test.onnx
 create mode 100644 test/onnx/hardsigmoid_bf16_test.onnx
 create mode 100644 test/onnx/imagescaler_bf16_test.onnx
 create mode 100644 test/onnx/instance_norm_bf16_test.onnx
 create mode 100644 test/onnx/instance_norm_dyn_batch_bf16_test.onnx
 create mode 100644 test/onnx/isinf_bf16_test.onnx
 create mode 100644 test/onnx/isnan_bf16_test.onnx
 create mode 100644 test/onnx/layer_norm_3d_bf16_test.onnx
 create mode 100644 test/onnx/layer_norm_4d_bf16_test.onnx
 create mode 100644 test/onnx/layer_norm_small_eps_bf16_test.onnx
 create mode 100644 test/onnx/mean_bf16_test.onnx
 create mode 100644 test/onnx/mod_test_fmod_bf16.onnx
 create mode 100644 test/onnx/mvn_default_axes_bf16_test.onnx
 create mode 100644 test/onnx/mvn_rank_2_bf16_test.onnx
 create mode 100644 test/onnx/mvn_rank_3_bf16_test.onnx
 create mode 100644 test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test.onnx
 create mode 100644 test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2.onnx
 create mode 100644 test/onnx/parse/add_bf16_test.cpp
 create mode 100644 test/onnx/parse/eyelike_bf16_test.cpp
 create mode 100644 test/onnx/parse/gemm_bf16_test.cpp
 create mode 100644 test/onnx/parse/group_norm_3d_bf16_test.cpp
 create mode 100644 test/onnx/parse/group_norm_4d_bf16_test.cpp
 create mode 100644 test/onnx/parse/group_norm_5d_bf16_test.cpp
 create mode 100644 test/onnx/parse/group_norm_small_eps_bf16_test.cpp
 create mode 100644 test/onnx/parse/hardsigmoid_bf16_test.cpp
 create mode 100644 test/onnx/parse/imagescaler_bf16_test.cpp
 create mode 100644 test/onnx/parse/instance_norm_bf16_test.cpp
 create mode 100644 test/onnx/parse/instance_norm_dyn_batch_bf16_test.cpp
 create mode 100644 test/onnx/parse/isinf_bf16_test.cpp
 create mode 100644 test/onnx/parse/isnan_bf16_test.cpp
 create mode 100644 test/onnx/parse/layer_norm_3d_bf16_test.cpp
 create mode 100644 test/onnx/parse/layer_norm_4d_bf16_test.cpp
 create mode 100644 test/onnx/parse/layer_norm_small_eps_bf16_test.cpp
 create mode 100644 test/onnx/parse/mean_bf16_test.cpp
 create mode 100644 test/onnx/parse/mod_test_fmod_bf16.cpp
 create mode 100644 test/onnx/parse/size_bf16_test.cpp
 create mode 100644 test/onnx/round_bf16_test.onnx
 create mode 100644 test/onnx/size_bf16_test.onnx
 create mode 100644 test/onnx/softmaxcrossentropyloss_2d_mean_reduction_bf16_test.onnx
 create mode 100644 test/onnx/softmaxcrossentropyloss_2d_no_reduction_bf16_test.onnx
 create mode 100644 test/onnx/softmaxcrossentropyloss_2d_sum_reduction_bf16_test.onnx
 create mode 100644 test/onnx/softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test.onnx
 create mode 100644 test/onnx/verify/add_bf16_test.cpp
 create mode 100644 test/onnx/verify/gelu_default_bf16_test.cpp
 create mode 100644 test/onnx/verify/gemm_bf16_test.cpp
 create mode 100644 test/onnx/verify/gridsample_bf16_test.cpp
 create mode 100644 test/onnx/verify/group_norm_3d_bf16_test.cpp
 create mode 100644 test/onnx/verify/isinf_bf16_test.cpp
 create mode 100644 test/onnx/verify/layer_norm_3d_bf16_test.cpp
 create mode 100644 test/onnx/verify/mvn_default_axes_bf16_test.cpp
 create mode 100644 test/onnx/verify/mvn_rank_2_bf16_test.cpp
 create mode 100644 test/onnx/verify/mvn_rank_3_bf16_test.cpp
 create mode 100644 test/onnx/verify/round_bf16_test.cpp

diff --git a/src/onnx/onnx_parser.cpp b/src/onnx/onnx_parser.cpp
index d7fd8367f2f..cf81e921710 100644
--- a/src/onnx/onnx_parser.cpp
+++ b/src/onnx/onnx_parser.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -687,7 +687,7 @@ shape::type_t get_type(int dtype)
     case 22: return shape::int8_type;
     case 14:
     case 15:
-    case 16:
+    case 16: return shape::bf16_type;
     case 17:
     case 19:
     case 20:
@@ -700,7 +700,8 @@ shape::type_t get_type(int dtype)
 bool is_type_float(shape::type_t dtype)
 {
     bool r = false;
-    if(dtype == shape::float_type or dtype == shape::double_type or dtype == shape::half_type)
+    if(dtype == shape::float_type or dtype == shape::double_type or dtype == shape::half_type or
+       dtype == shape::bf16_type)
     {
         r = true;
     }
diff --git a/src/onnx/parse_instancenorm.cpp b/src/onnx/parse_instancenorm.cpp
index 6c2f79134e0..05a066e1463 100644
--- a/src/onnx/parse_instancenorm.cpp
+++ b/src/onnx/parse_instancenorm.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -36,7 +36,8 @@ namespace onnx {
 
 struct parse_instancenorm : op_parser<parse_instancenorm>
 {
-    std::set<shape::type_t> valid_types = {shape::float_type, shape::half_type, shape::double_type};
+    std::set<shape::type_t> valid_types = {
+        shape::float_type, shape::half_type, shape::double_type, shape::bf16_type};
 
     std::vector<op_desc> operators() const { return {{"InstanceNormalization"}}; }
 
diff --git a/test/onnx/add_bf16_test.onnx b/test/onnx/add_bf16_test.onnx
new file mode 100644
index 00000000000..36e4efefd46
--- /dev/null
+++ b/test/onnx/add_bf16_test.onnx
@@ -0,0 +1,16 @@
+	add_bf16_test:R
+
+0
+12"Addadd_bf16_testZ
+0
+
+
+Z
+1
+
+
+b
+2
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/eyelike_bf16_test.onnx b/test/onnx/eyelike_bf16_test.onnx
new file mode 100644
index 00000000000..9cdef15cb63
--- /dev/null
+++ b/test/onnx/eyelike_bf16_test.onnx
@@ -0,0 +1,11 @@
+	eyelike_bf16_test:R
+
+T1T2"EyeLikeeyelike_bf16_testZ
+T1
+
+
+b
+T2
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/gelu_default_bf16_test.onnx b/test/onnx/gelu_default_bf16_test.onnx
new file mode 100644
index 00000000000..bd12acc9b3a
--- /dev/null
+++ b/test/onnx/gelu_default_bf16_test.onnx
@@ -0,0 +1,11 @@
+	gelu_default_bf16_test:P
+
+xy"Gelugelu_default_bf16_testZ
+x
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/gemm_bf16_test.onnx b/test/onnx/gemm_bf16_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..83e839eeec542b3a427254eb062be50214487346
GIT binary patch
literal 193
zcmd<!6yi%y&CQKZN;5QzFG(#fv0A~%<;2D4$i)aGorM@9l~~+?3bgpSSQB#!G7?1@
z7#QpqFfwZKaj_(&mL!UvJ>vuB^K!A36eZ>rJ4!GvU}Qo!HcA+1rVt+&4~Kvd2Nx3u
g2(y6%ozMi?L4wXmf^0yQj7dPjNHjrqCnf=50Az<CasU7T

literal 0
HcmV?d00001

diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py
index c46ce85d080..3f492e70f2c 100644
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
@@ -1,7 +1,7 @@
 #####################################################################################
 # The MIT License (MIT)
 #
-# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -139,6 +139,21 @@ def add_fp8_test():
     return ([node], [x, y], [z])
 
 
+@onnx_test()
+def add_bf16_test():
+    x = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [1])
+    y = helper.make_tensor_value_info('1', TensorProto.BFLOAT16, [1])
+    z = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [1])
+
+    node = onnx.helper.make_node(
+        'Add',
+        inputs=['0', '1'],
+        outputs=['2'],
+    )
+
+    return ([node], [x, y], [z])
+
+
 @onnx_test()
 def add_scalar_test():
     x = helper.make_tensor_value_info('0', TensorProto.UINT8, [2, 3, 4, 5])
@@ -3555,6 +3570,19 @@ def eyelike_half_test():
     return ([node], [T1], [T2])
 
 
+@onnx_test()
+def eyelike_bf16_test():
+    T1 = helper.make_tensor_value_info('T1', TensorProto.BFLOAT16, [8, 8])
+    T2 = helper.make_tensor_value_info('T2', TensorProto.BFLOAT16, [8, 8])
+
+    node = onnx.helper.make_node(
+        'EyeLike',
+        inputs=['T1'],
+        outputs=['T2'],
+    )
+    return ([node], [T1], [T2])
+
+
 @onnx_test()
 def eyelike_k_test():
     T1 = helper.make_tensor_value_info('T1', TensorProto.FLOAT, [3, 4])
@@ -3835,6 +3863,16 @@ def gelu_default_half_test():
     return ([node], [x], [y])
 
 
+# @onnx_test()
+# def gelu_default_bf16_test():
+#     x = helper.make_tensor_value_info('x', TensorProto.BFLOAT16, [3, 3])
+#     y = helper.make_tensor_value_info('y', TensorProto.BFLOAT16, [3, 3])
+
+#     node = onnx.helper.make_node("Gelu", inputs=["x"], outputs=["y"])
+
+#     return ([node], [x], [y])
+
+
 @onnx_test()
 def gelu_tanh_test():
     x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [3, 3])
@@ -4005,6 +4043,23 @@ def gemm_half_test():
     return ([node], [A, B, C], [Y])
 
 
+@onnx_test()
+def gemm_bf16_test():
+    A = helper.make_tensor_value_info('A', TensorProto.BFLOAT16, [8, 6])
+    B = helper.make_tensor_value_info('B', TensorProto.BFLOAT16, [8, 7])
+    C = helper.make_tensor_value_info('C', TensorProto.BFLOAT16, [6, 1])
+    Y = helper.make_tensor_value_info('Y', TensorProto.BFLOAT16, [6, 7])
+
+    node = onnx.helper.make_node('Gemm',
+                                 inputs=['A', 'B', 'C'],
+                                 outputs=['Y'],
+                                 alpha=0.5,
+                                 beta=0.8,
+                                 transA=1)
+
+    return ([node], [A, B, C], [Y])
+
+
 @onnx_test()
 def gemm_fp8_test():
     A = helper.make_tensor_value_info('A', TensorProto.FLOAT8E4M3FNUZ, [8, 6])
@@ -4298,6 +4353,25 @@ def gridsample_half_test():
     return ([node], [x, grid], [y])
 
 
+@onnx_test()
+def gridsample_bf16_test():
+    x = helper.make_tensor_value_info('x', TensorProto.BFLOAT16, [1, 1, 4, 4])
+    grid = helper.make_tensor_value_info('grid', TensorProto.FLOAT,
+                                         [1, 6, 6, 2])
+    y = helper.make_tensor_value_info('y', TensorProto.BFLOAT16, [1, 1, 6, 6])
+
+    node = onnx.helper.make_node(
+        "GridSample",
+        inputs=["x", "grid"],
+        outputs=["y"],
+        mode="linear",
+        padding_mode="zeros",
+        align_corners=0,
+    )
+
+    return ([node], [x, grid], [y])
+
+
 @onnx_test()
 def gridsample_int_test():
     x = helper.make_tensor_value_info('x', TensorProto.INT32, [1, 1, 4, 4])
@@ -4662,6 +4736,13 @@ def group_norm_3d_half_test():
                            dtype=TensorProto.FLOAT16)
 
 
+@onnx_test()
+def group_norm_3d_bf16_test():
+    return group_norm_test([1, 4, 2], [2], [2], [1, 4, 2],
+                           2,
+                           dtype=TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def group_norm_4d_test():
     return group_norm_test([1, 4, 3, 3], [2], [2], [1, 4, 3, 3], 2)
@@ -4674,6 +4755,13 @@ def group_norm_4d_half_test():
                            dtype=TensorProto.FLOAT16)
 
 
+@onnx_test()
+def group_norm_4d_bf16_test():
+    return group_norm_test([1, 4, 3, 3], [2], [2], [1, 4, 3, 3],
+                           2,
+                           dtype=TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def group_norm_5d_test():
     return group_norm_test([3, 3, 3, 3, 3], [1], [1], [3, 3, 3, 3, 3], 1)
@@ -4686,6 +4774,13 @@ def group_norm_5d_half_test():
                            dtype=TensorProto.FLOAT16)
 
 
+@onnx_test()
+def group_norm_5d_bf16_test():
+    return group_norm_test([3, 3, 3, 3, 3], [1], [1], [3, 3, 3, 3, 3],
+                           1,
+                           dtype=TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def group_norm_small_eps_half_test():
     return group_norm_test([1, 4, 2], [2], [2], [1, 4, 2],
@@ -4694,6 +4789,14 @@ def group_norm_small_eps_half_test():
                            dtype=TensorProto.FLOAT16)
 
 
+@onnx_test()
+def group_norm_small_eps_bf16_test():
+    return group_norm_test([1, 4, 2], [2], [2], [1, 4, 2],
+                           2,
+                           eps_value=1e-7,
+                           dtype=TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def group_norm_invalid_num_groups_error_test():
     return group_norm_test([1, 4, 3, 3], [2], [2], [1, 4, 3, 3], 3)
@@ -5145,6 +5248,16 @@ def hardsigmoid_half_test():
     return ([node], [x], [y])
 
 
+@onnx_test()
+def hardsigmoid_bf16_test():
+    x = helper.make_tensor_value_info('x', TensorProto.BFLOAT16, [1, 3, 4, 5])
+    y = helper.make_tensor_value_info('y', TensorProto.BFLOAT16, [1, 3, 4, 5])
+
+    node = onnx.helper.make_node('HardSigmoid', inputs=['x'], outputs=['y'])
+
+    return ([node], [x], [y])
+
+
 @onnx_test()
 def hardsigmoid_verify_test():
     x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [2, 5])
@@ -5875,6 +5988,22 @@ def imagescaler_half_test():
     return ([node], [x], [y])
 
 
+@onnx_test()
+def imagescaler_bf16_test():
+    x = helper.make_tensor_value_info('0', TensorProto.BFLOAT16,
+                                      [1, 3, 16, 16])
+    y = helper.make_tensor_value_info('1', TensorProto.BFLOAT16,
+                                      [1, 3, 16, 16])
+
+    node = onnx.helper.make_node('ImageScaler',
+                                 inputs=['0'],
+                                 outputs=['1'],
+                                 bias=[0.01, 0.02, 0.03],
+                                 scale=0.5)
+
+    return ([node], [x], [y])
+
+
 @onnx_test()
 def implicit_add_bcast_test():
     x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [2, 3, 4, 5])
@@ -5970,6 +6099,20 @@ def instance_norm_half_test():
     return ([node], [x, scale, bias], [y])
 
 
+@onnx_test()
+def instance_norm_bf16_test():
+    x = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [1, 2, 3, 3])
+    scale = helper.make_tensor_value_info('1', TensorProto.BFLOAT16, [2])
+    bias = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [2])
+    y = helper.make_tensor_value_info('3', TensorProto.BFLOAT16, [1, 2, 3, 3])
+
+    node = onnx.helper.make_node('InstanceNormalization',
+                                 inputs=['0', '1', '2'],
+                                 outputs=['3'])
+
+    return ([node], [x, scale, bias], [y])
+
+
 @onnx_test()
 def instance_norm_type_mismatch_test():
     x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 2, 3, 3])
@@ -6016,6 +6159,23 @@ def instance_norm_dyn_batch_half_test():
     return ([node], [x, scale, bias], [y])
 
 
+@onnx_test()
+def instance_norm_dyn_batch_bf16_test():
+    # the batch size is a dynamic dimension
+    x = helper.make_tensor_value_info('0', TensorProto.BFLOAT16,
+                                      [None, 2, 3, 3])
+    scale = helper.make_tensor_value_info('1', TensorProto.BFLOAT16, [2])
+    bias = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [2])
+    y = helper.make_tensor_value_info('3', TensorProto.BFLOAT16,
+                                      [None, 2, 3, 3])
+
+    node = onnx.helper.make_node('InstanceNormalization',
+                                 inputs=['0', '1', '2'],
+                                 outputs=['3'])
+
+    return ([node], [x, scale, bias], [y])
+
+
 @onnx_test()
 def instance_norm_invalid_type_test():
     x = helper.make_tensor_value_info('0', TensorProto.INT32, [1, 2, 3, 3])
@@ -6301,6 +6461,19 @@ def isinf_half_test():
     return ([node], [t1], [t2])
 
 
+@onnx_test()
+def isinf_bf16_test():
+    t1 = helper.make_tensor_value_info('t1', TensorProto.BFLOAT16, [2, 3])
+    t2 = helper.make_tensor_value_info('t2', TensorProto.BOOL, [2, 3])
+
+    node = onnx.helper.make_node(
+        'IsInf',
+        inputs=['t1'],
+        outputs=['t2'],
+    )
+    return ([node], [t1], [t2])
+
+
 @onnx_test()
 def isinf_neg_test():
     t1 = helper.make_tensor_value_info('t1', TensorProto.FLOAT, [2, 3])
@@ -6372,6 +6545,19 @@ def isnan_half_test():
     return ([node], [t1], [t2])
 
 
+@onnx_test()
+def isnan_bf16_test():
+    t1 = helper.make_tensor_value_info('t1', TensorProto.BFLOAT16, [2, 3])
+    t2 = helper.make_tensor_value_info('t2', TensorProto.BFLOAT16, [2, 3])
+
+    node = onnx.helper.make_node(
+        'IsNaN',
+        inputs=['t1'],
+        outputs=['t2'],
+    )
+    return ([node], [t1], [t2])
+
+
 @onnx_test()
 def layernorm_test():
     x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 1, 5])
@@ -6480,6 +6666,11 @@ def layer_norm_3d_half_test():
     return make_layer_norm([1, 4, 2], -1, TensorProto.FLOAT16)
 
 
+@onnx_test()
+def layer_norm_3d_bf16_test():
+    return make_layer_norm([1, 4, 2], -1, TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def layer_norm_4d_test():
     return make_layer_norm([3, 3, 3, 3], -1)
@@ -6490,6 +6681,11 @@ def layer_norm_4d_half_test():
     return make_layer_norm([3, 3, 3, 3], -1, TensorProto.FLOAT16)
 
 
+@onnx_test()
+def layer_norm_4d_bf16_test():
+    return make_layer_norm([3, 3, 3, 3], -1, TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def layer_norm_invalid_axis_error_test():
     return make_layer_norm([1, 4, 2], 1000)
@@ -6539,6 +6735,20 @@ def layer_norm_small_eps_half_test():
     return ([node], [x, scale], [y])
 
 
+@onnx_test()
+def layer_norm_small_eps_bf16_test():
+    x = helper.make_tensor_value_info('x', TensorProto.BFLOAT16, [1, 2])
+    scale = helper.make_tensor_value_info('scale', TensorProto.BFLOAT16, [2])
+    y = helper.make_tensor_value_info('y', TensorProto.BFLOAT16, [1, 2])
+
+    node = onnx.helper.make_node('LayerNormalization',
+                                 inputs=['x', 'scale'],
+                                 outputs=['y'],
+                                 epsilon=1e-7)
+
+    return ([node], [x, scale], [y])
+
+
 @onnx_test()
 def leaky_relu_test():
     x = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3])
@@ -7904,6 +8114,25 @@ def mean_fp16_test():
     return ([node], [data_0, data_1, data_2], [mean])
 
 
+@onnx_test()
+def mean_bf16_test():
+    data_0 = helper.make_tensor_value_info('0', TensorProto.BFLOAT16,
+                                           [1, 2, 3])
+    data_1 = helper.make_tensor_value_info('1', TensorProto.BFLOAT16,
+                                           [1, 2, 3])
+    data_2 = helper.make_tensor_value_info('2', TensorProto.BFLOAT16,
+                                           [1, 2, 3])
+
+    mean = helper.make_tensor_value_info('mean', TensorProto.BFLOAT16,
+                                         [1, 2, 3])
+
+    node = onnx.helper.make_node("Mean",
+                                 inputs=["0", "1", "2"],
+                                 outputs=["mean"])
+
+    return ([node], [data_0, data_1, data_2], [mean])
+
+
 @onnx_test()
 def mean_invalid_broadcast_test():
     data_0 = helper.make_tensor_value_info('0', TensorProto.FLOAT, [1, 2, 3])
@@ -7977,6 +8206,11 @@ def mvn_default_axes_fp16_test():
     return mvn_default_axes_test_base([2, 2, 2, 2], TensorProto.FLOAT16)
 
 
+@onnx_test()
+def mvn_default_axes_bf16_test():
+    return mvn_default_axes_test_base([2, 2, 2, 2], TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def mvn_default_axes_rank_too_small_test():
     return mvn_default_axes_test_base([2, 2, 2])
@@ -8008,6 +8242,11 @@ def mvn_rank_2_fp16_test():
     return mvn_n_rank_test_base([1], [2, 2], TensorProto.FLOAT16)
 
 
+@onnx_test()
+def mvn_rank_2_bf16_test():
+    return mvn_n_rank_test_base([1], [2, 2], TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def mvn_rank_3_test():
     return mvn_n_rank_test_base([0, 1], [2, 2, 2])
@@ -8018,6 +8257,11 @@ def mvn_rank_3_fp16_test():
     return mvn_n_rank_test_base([0, 1], [2, 2, 2], TensorProto.FLOAT16)
 
 
+@onnx_test()
+def mvn_rank_3_bf16_test():
+    return mvn_n_rank_test_base([0, 1], [2, 2, 2], TensorProto.BFLOAT16)
+
+
 @onnx_test()
 def mvn_axes_rank_too_small_test():
     return mvn_n_rank_test_base([0, 1, 2], [2, 2, 2])
@@ -8066,6 +8310,17 @@ def mod_test_half():
     return ([node], [a, b], [y])
 
 
+# @onnx_test()
+# def mod_test_bf16():
+#     a = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [3, 3, 3])
+#     b = helper.make_tensor_value_info('1', TensorProto.BFLOAT16, [3, 3, 3])
+#     y = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [3, 3, 3])
+
+#     node = onnx.helper.make_node('Mod', inputs=['0', '1'], outputs=['2'])
+
+#     return ([node], [a, b], [y])
+
+
 @onnx_test()
 def mod_test_different_dtypes():
     a = helper.make_tensor_value_info('0', TensorProto.INT16, [3, 3, 3])
@@ -8111,6 +8366,20 @@ def mod_test_fmod_half():
     return ([node], [a, b], [y])
 
 
+@onnx_test()
+def mod_test_fmod_bf16():
+    a = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [3, 3, 3])
+    b = helper.make_tensor_value_info('1', TensorProto.BFLOAT16, [3, 3, 3])
+    y = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [3, 3, 3])
+
+    node = onnx.helper.make_node('Mod',
+                                 inputs=['0', '1'],
+                                 outputs=['2'],
+                                 fmod=1)
+
+    return ([node], [a, b], [y])
+
+
 @onnx_test()
 def mod_test_fmod_different_dtypes():
     a = helper.make_tensor_value_info('0', TensorProto.FLOAT, [3, 3, 3])
@@ -11296,6 +11565,16 @@ def round_half_test():
     return ([node], [x], [y])
 
 
+@onnx_test()
+def round_bf16_test():
+    x = helper.make_tensor_value_info('x', TensorProto.BFLOAT16, [4, 4])
+    y = helper.make_tensor_value_info('y', TensorProto.BFLOAT16, [4, 4])
+
+    node = onnx.helper.make_node('Round', inputs=['x'], outputs=['y'])
+
+    return ([node], [x], [y])
+
+
 def make_scatter_elements_test(reduction="none"):
     x = helper.make_tensor_value_info('data', TensorProto.FLOAT, [3, 4, 5, 6])
     i = helper.make_tensor_value_info('indices', TensorProto.INT32,
@@ -11823,6 +12102,18 @@ def size_half_test():
     return ([node], [x], [y])
 
 
+@onnx_test()
+def size_bf16_test():
+    x = helper.make_tensor_value_info('x', TensorProto.BFLOAT16, [3, 1])
+    y = helper.make_tensor_value_info('y', TensorProto.INT64, [1])
+    node = onnx.helper.make_node(
+        'Size',
+        inputs=['x'],
+        outputs=['y'],
+    )
+    return ([node], [x], [y])
+
+
 @onnx_test()
 def size_int_test():
     x = helper.make_tensor_value_info('x', TensorProto.INT32, [8, 2, 3])
@@ -12687,6 +12978,25 @@ def softmaxcrossentropyloss_2d_no_reduction_half_test():
     return ([node], [scores, labels], [loss])
 
 
+@onnx_test()
+def softmaxcrossentropyloss_2d_no_reduction_bf16_test():
+    scores = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [4, 4])
+    labels = helper.make_tensor_value_info('1', TensorProto.INT32, [4])
+    loss = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [4])
+
+    node = onnx.helper.make_node(
+        "SoftmaxCrossEntropyLoss",
+        inputs=[
+            "0",
+            "1",
+        ],
+        outputs=["2"],
+        reduction="none",
+    )
+
+    return ([node], [scores, labels], [loss])
+
+
 @onnx_test()
 def softmaxcrossentropyloss_2d_sum_reduction_test():
     scores = helper.make_tensor_value_info('0', TensorProto.FLOAT, [4, 4])
@@ -12744,6 +13054,25 @@ def softmaxcrossentropyloss_2d_sum_reduction_half_test():
     return ([node], [scores, labels], [loss])
 
 
+@onnx_test()
+def softmaxcrossentropyloss_2d_sum_reduction_bf16_test():
+    scores = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [4, 4])
+    labels = helper.make_tensor_value_info('1', TensorProto.INT32, [4])
+    loss = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [4])
+
+    node = onnx.helper.make_node(
+        "SoftmaxCrossEntropyLoss",
+        inputs=[
+            "0",
+            "1",
+        ],
+        outputs=["2"],
+        reduction="sum",
+    )
+
+    return ([node], [scores, labels], [loss])
+
+
 @onnx_test()
 def softmaxcrossentropyloss_2d_mean_reduction_test():
     scores = helper.make_tensor_value_info('0', TensorProto.FLOAT, [4, 4])
@@ -12801,6 +13130,25 @@ def softmaxcrossentropyloss_2d_mean_reduction_half_test():
     return ([node], [scores, labels], [loss])
 
 
+@onnx_test()
+def softmaxcrossentropyloss_2d_mean_reduction_bf16_test():
+    scores = helper.make_tensor_value_info('0', TensorProto.BFLOAT16, [4, 4])
+    labels = helper.make_tensor_value_info('1', TensorProto.INT32, [4])
+    loss = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [4])
+
+    node = onnx.helper.make_node(
+        "SoftmaxCrossEntropyLoss",
+        inputs=[
+            "0",
+            "1",
+        ],
+        outputs=["2"],
+        reduction="mean",
+    )
+
+    return ([node], [scores, labels], [loss])
+
+
 @onnx_test()
 def softmaxcrossentropyloss_2d_no_reduction_weighted_test():
     scores = helper.make_tensor_value_info('0', TensorProto.FLOAT, [4, 4])
@@ -13061,7 +13409,7 @@ def softmaxcrossentropyloss_2d_sum_reduction_double_weighted_test():
 
 
 @onnx_test()
-def softmaxcrossentropyloss_2d_sum_reduction_half_weighted_test():
+def softmaxcrossentropyloss_2d_sum_reduction_bf16_weighted_test():
     scores = helper.make_tensor_value_info('0', TensorProto.FLOAT16, [4, 4])
     labels = helper.make_tensor_value_info('1', TensorProto.INT32, [4])
     weights = helper.make_tensor_value_info('2', TensorProto.FLOAT16, [4])
@@ -13188,6 +13536,28 @@ def softmaxcrossentropyloss_kd_mean_reduction_half_weighted_test():
     return ([node], [scores, labels, weights], [loss])
 
 
+@onnx_test()
+def softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test():
+    scores = helper.make_tensor_value_info('0', TensorProto.BFLOAT16,
+                                           [4, 4, 2, 2])
+    labels = helper.make_tensor_value_info('1', TensorProto.INT32, [4, 2, 2])
+    weights = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [4])
+    loss = helper.make_tensor_value_info('3', TensorProto.BFLOAT16, [4])
+
+    node = onnx.helper.make_node(
+        "SoftmaxCrossEntropyLoss",
+        inputs=[
+            "0",
+            "1",
+            "2",
+        ],
+        outputs=["3"],
+        reduction="mean",
+    )
+
+    return ([node], [scores, labels, weights], [loss])
+
+
 @onnx_test()
 def softmaxcrossentropyloss_kd_sum_reduction_double_weighted_test():
     scores = helper.make_tensor_value_info('0', TensorProto.DOUBLE,
@@ -13254,6 +13624,28 @@ def negativeloglikelihoodloss_kd_mean_reduction_half_weighted_test():
     return ([node], [scores, labels, weights], [loss])
 
 
+@onnx_test()
+def negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test():
+    scores = helper.make_tensor_value_info('0', TensorProto.BFLOAT16,
+                                           [4, 4, 2, 2])
+    labels = helper.make_tensor_value_info('1', TensorProto.INT32, [4, 2, 2])
+    weights = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [4])
+    loss = helper.make_tensor_value_info('3', TensorProto.BFLOAT16, [4])
+
+    node = onnx.helper.make_node(
+        "NegativeLogLikelihoodLoss",
+        inputs=[
+            "0",
+            "1",
+            "2",
+        ],
+        outputs=["3"],
+        reduction="mean",
+    )
+
+    return ([node], [scores, labels, weights], [loss])
+
+
 @onnx_test()
 def negativeloglikelihoodloss_kd_mean_reduction_half_weighted_test2():
     scores = helper.make_tensor_value_info('0', TensorProto.FLOAT16, [2, 3, 2])
@@ -13275,6 +13667,28 @@ def negativeloglikelihoodloss_kd_mean_reduction_half_weighted_test2():
     return ([node], [scores, labels, weights], [loss])
 
 
+@onnx_test()
+def negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2():
+    scores = helper.make_tensor_value_info('0', TensorProto.BFLOAT16,
+                                           [2, 3, 2])
+    labels = helper.make_tensor_value_info('1', TensorProto.INT32, [2, 2])
+    weights = helper.make_tensor_value_info('2', TensorProto.BFLOAT16, [3])
+    loss = helper.make_tensor_value_info('3', TensorProto.BFLOAT16, [2])
+
+    node = onnx.helper.make_node(
+        "NegativeLogLikelihoodLoss",
+        inputs=[
+            "0",
+            "1",
+            "2",
+        ],
+        outputs=["3"],
+        reduction="mean",
+    )
+
+    return ([node], [scores, labels, weights], [loss])
+
+
 @onnx_test()
 def negativeloglikelihoodloss_kd_sum_reduction_double_weighted_test():
     scores = helper.make_tensor_value_info('0', TensorProto.DOUBLE,
diff --git a/test/onnx/gridsample_bf16_test.onnx b/test/onnx/gridsample_bf16_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..5771a97ac839f48a506f886e31d59c01bc311f63
GIT binary patch
literal 236
zcmd<!6cR}<%1kLv%q_@CjZaE5G>b1uEiSP-%g7bY#aO|`0+JPCtW@H12Qq@e%Ctnd
zcoTCn)AQn!^NaFQi;5)}7BDht339RI=BK18vE^jur6v|FU}V;k;NmGrOi9ViOOFRj
zu~wxP<rf2Gu-h6X4YX27j7x+=KuCa#iGvY}S%5T4lpNGCAbCc(JR49Bgqf0nnk%tr
L1}ku45)cLerqeS#

literal 0
HcmV?d00001

diff --git a/test/onnx/group_norm_3d_bf16_test.onnx b/test/onnx/group_norm_3d_bf16_test.onnx
new file mode 100644
index 00000000000..4c8b2087a11
--- /dev/null
+++ b/test/onnx/group_norm_3d_bf16_test.onnx
@@ -0,0 +1,26 @@
+	group_norm_3d_bf16_test:�
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon��'7�*
+
+num_groups�group_norm_3d_bf16_testZ
+x
+
+
+
+Z
+scale
+
+
+Z
+bias
+
+
+b
+y
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/group_norm_4d_bf16_test.onnx b/test/onnx/group_norm_4d_bf16_test.onnx
new file mode 100644
index 00000000000..afc14b97abe
--- /dev/null
+++ b/test/onnx/group_norm_4d_bf16_test.onnx
@@ -0,0 +1,28 @@
+	group_norm_4d_bf16_test:�
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon��'7�*
+
+num_groups�group_norm_4d_bf16_testZ
+x
+
+
+
+
+Z
+scale
+
+
+Z
+bias
+
+
+b
+y
+
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/group_norm_5d_bf16_test.onnx b/test/onnx/group_norm_5d_bf16_test.onnx
new file mode 100644
index 00000000000..67848f3dad5
--- /dev/null
+++ b/test/onnx/group_norm_5d_bf16_test.onnx
@@ -0,0 +1,30 @@
+	group_norm_5d_bf16_test:�
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon��'7�*
+
+num_groups�group_norm_5d_bf16_testZ
+x
+
+
+
+
+
+Z
+scale
+
+
+Z
+bias
+
+
+b
+y
+
+
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/group_norm_small_eps_bf16_test.onnx b/test/onnx/group_norm_small_eps_bf16_test.onnx
new file mode 100644
index 00000000000..21da27b93d6
--- /dev/null
+++ b/test/onnx/group_norm_small_eps_bf16_test.onnx
@@ -0,0 +1,26 @@
+	group_norm_small_eps_bf16_test:�
+M
+x
+scale
+biasy"GroupNormalization*
+epsilon���3�*
+
+num_groups�group_norm_small_eps_bf16_testZ
+x
+
+
+
+Z
+scale
+
+
+Z
+bias
+
+
+b
+y
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/hardsigmoid_bf16_test.onnx b/test/onnx/hardsigmoid_bf16_test.onnx
new file mode 100644
index 00000000000..bb5c4313c2d
--- /dev/null
+++ b/test/onnx/hardsigmoid_bf16_test.onnx
@@ -0,0 +1,15 @@
+	hardsigmoid_bf16_test:f
+
+xy"HardSigmoidhardsigmoid_bf16_testZ
+x
+
+
+
+
+b
+y
+
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/imagescaler_bf16_test.onnx b/test/onnx/imagescaler_bf16_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..ae06ec39988b1e0fc1c87c847b2edc2e7675f8a8
GIT binary patch
literal 177
zcmd<!6cWwMO-xTMPEO28Es9S{Gc=1YNi8n1n#jmy$HizM#Av9*?Fmv53|65f!Nro4
znOJPgbzRv8NG!Io?LYL@W&tCc7C#p&*c4F)1_t{DjEp!;j*<o$E5;?lAs{5c#l*n~
V#LPepVhbbzB@J;&Ixz_d0|1uPCVl__

literal 0
HcmV?d00001

diff --git a/test/onnx/instance_norm_bf16_test.onnx b/test/onnx/instance_norm_bf16_test.onnx
new file mode 100644
index 00000000000..48129ac901e
--- /dev/null
+++ b/test/onnx/instance_norm_bf16_test.onnx
@@ -0,0 +1,25 @@
+	instance_norm_bf16_test:�
+#
+0
+1
+23"InstanceNormalizationinstance_norm_bf16_testZ
+0
+
+
+
+
+Z
+1
+
+
+Z
+2
+
+
+b
+3
+
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/instance_norm_dyn_batch_bf16_test.onnx b/test/onnx/instance_norm_dyn_batch_bf16_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a21cc6c04457b03f5632b1c960e63633432fcb88
GIT binary patch
literal 204
zcmd<!6jIF0D=taQOHPf?%P-1}PpQm{Pf9FF&WKM+Gc=1YNi8n1TENJq%*ANH#c0UI
zXe7jFtR(6Q)#C@$l9-cOl~|IQpGSmIQIbG&ghaT6I0S_FxEQ#YIGBK#8HA(wf#wTw
aad7~pSU?gW0V8CAB%o$v?3$gJ1cU*A(km(e

literal 0
HcmV?d00001

diff --git a/test/onnx/isinf_bf16_test.onnx b/test/onnx/isinf_bf16_test.onnx
new file mode 100644
index 00000000000..f0feb41cc3f
--- /dev/null
+++ b/test/onnx/isinf_bf16_test.onnx
@@ -0,0 +1,11 @@
+	isinf_bf16_test:N
+
+t1t2"IsInfisinf_bf16_testZ
+t1
+
+
+b
+t2
+	
+
+B
\ No newline at end of file
diff --git a/test/onnx/isnan_bf16_test.onnx b/test/onnx/isnan_bf16_test.onnx
new file mode 100644
index 00000000000..bbbd17e139d
--- /dev/null
+++ b/test/onnx/isnan_bf16_test.onnx
@@ -0,0 +1,11 @@
+	isnan_bf16_test:N
+
+t1t2"IsNaNisnan_bf16_testZ
+t1
+
+
+b
+t2
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/layer_norm_3d_bf16_test.onnx b/test/onnx/layer_norm_3d_bf16_test.onnx
new file mode 100644
index 00000000000..87c1fce7e2e
--- /dev/null
+++ b/test/onnx/layer_norm_3d_bf16_test.onnx
@@ -0,0 +1,24 @@
+	layer_norm_3d_bf16_test:�
+=
+x
+scale
+biasy"LayerNormalization*
+axis����������layer_norm_3d_bf16_testZ
+x
+
+
+
+Z
+scale
+
+
+Z
+bias
+
+
+b
+y
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/layer_norm_4d_bf16_test.onnx b/test/onnx/layer_norm_4d_bf16_test.onnx
new file mode 100644
index 00000000000..7dca94f82b9
--- /dev/null
+++ b/test/onnx/layer_norm_4d_bf16_test.onnx
@@ -0,0 +1,26 @@
+	layer_norm_4d_bf16_test:�
+=
+x
+scale
+biasy"LayerNormalization*
+axis����������layer_norm_4d_bf16_testZ
+x
+
+
+
+
+Z
+scale
+
+
+Z
+bias
+
+
+b
+y
+
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/layer_norm_small_eps_bf16_test.onnx b/test/onnx/layer_norm_small_eps_bf16_test.onnx
new file mode 100644
index 00000000000..a844828bad3
--- /dev/null
+++ b/test/onnx/layer_norm_small_eps_bf16_test.onnx
@@ -0,0 +1,17 @@
+	layer_norm_small_eps_bf16_test:�
+4
+x
+scaley"LayerNormalization*
+epsilon���3�layer_norm_small_eps_bf16_testZ
+x
+
+
+Z
+scale
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/mean_bf16_test.onnx b/test/onnx/mean_bf16_test.onnx
new file mode 100644
index 00000000000..7bb6a71dc16
--- /dev/null
+++ b/test/onnx/mean_bf16_test.onnx
@@ -0,0 +1,25 @@
+	mean_bf16_test:�
+
+0
+1
+2mean"Meanmean_bf16_testZ
+0
+
+
+
+Z
+1
+
+
+
+Z
+2
+
+
+
+b
+mean
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/mod_test_fmod_bf16.onnx b/test/onnx/mod_test_fmod_bf16.onnx
new file mode 100644
index 00000000000..2bd7005d3b3
--- /dev/null
+++ b/test/onnx/mod_test_fmod_bf16.onnx
@@ -0,0 +1,20 @@
+	mod_test_fmod_bf16:|
+
+0
+12"Mod*
+fmod�mod_test_fmod_bf16Z
+0
+
+
+
+Z
+1
+
+
+
+b
+2
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/mvn_default_axes_bf16_test.onnx b/test/onnx/mvn_default_axes_bf16_test.onnx
new file mode 100644
index 00000000000..3f590bf2e65
--- /dev/null
+++ b/test/onnx/mvn_default_axes_bf16_test.onnx
@@ -0,0 +1,15 @@
+	mvn_default_axes_bf16_test:�
+&
+dataout"MeanVarianceNormalizationmvn_default_axes_bf16_testZ
+data
+
+
+
+
+b
+out
+
+
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/mvn_rank_2_bf16_test.onnx b/test/onnx/mvn_rank_2_bf16_test.onnx
new file mode 100644
index 00000000000..e39c31cc56a
--- /dev/null
+++ b/test/onnx/mvn_rank_2_bf16_test.onnx
@@ -0,0 +1,12 @@
+	mvn_rank_2_bf16_test:z
+3
+dataout"MeanVarianceNormalization*
+axes@�mvn_rank_2_bf16_testZ
+data
+
+
+b
+out
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/mvn_rank_3_bf16_test.onnx b/test/onnx/mvn_rank_3_bf16_test.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f2df467ccbf26010b46c29841a19e35792c05226
GIT binary patch
literal 163
zcmd<!6cWiT%Zo2c%*&29j!#N6G>b1uEiSQYVdOI9Vo6CXNfctvFD+4$^i5673rj4@
zOw3D8^~*2HP0Y!xN-W9D&(q@NVo9t>Ep}jVU|hh+j@`T{DX3vWTml>dLOfhd9855r
QB*_J`LI{hL6O({2020h5;Q#;t

literal 0
HcmV?d00001

diff --git a/test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test.onnx b/test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test.onnx
new file mode 100644
index 00000000000..2c7fc3da657
--- /dev/null
+++ b/test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test.onnx
@@ -0,0 +1,25 @@
+	>negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test:�
+=
+0
+1
+23"NegativeLogLikelihoodLoss*
+	reduction"mean�>negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_testZ
+0
+
+
+
+
+Z
+1
+
+
+
+Z
+2
+
+
+b
+3
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2.onnx b/test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2.onnx
new file mode 100644
index 00000000000..7e5cffccc16
--- /dev/null
+++ b/test/onnx/negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2.onnx
@@ -0,0 +1,23 @@
+	?negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2:�
+=
+0
+1
+23"NegativeLogLikelihoodLoss*
+	reduction"mean�?negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test2Z
+0
+
+
+
+Z
+1
+
+
+Z
+2
+
+
+b
+3
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/parse/add_bf16_test.cpp b/test/onnx/parse/add_bf16_test.cpp
new file mode 100644
index 00000000000..3a9621636f2
--- /dev/null
+++ b/test/onnx/parse/add_bf16_test.cpp
@@ -0,0 +1,39 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(add_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto p0  = mm->add_parameter("0", migraphx::shape{migraphx::shape::bf16_type, {1}});
+    auto p1  = mm->add_parameter("1", migraphx::shape{migraphx::shape::bf16_type, {1}});
+
+    mm->add_instruction(migraphx::make_op("add"), p0, p1);
+
+    auto prog = optimize_onnx("add_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/eyelike_bf16_test.cpp b/test/onnx/parse/eyelike_bf16_test.cpp
new file mode 100644
index 00000000000..ffead021ffb
--- /dev/null
+++ b/test/onnx/parse/eyelike_bf16_test.cpp
@@ -0,0 +1,46 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(eyelike_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    std::vector<std::size_t> input_lens{8, 8};
+    const size_t k   = 0;
+    auto num_rows    = input_lens.front();
+    auto num_cols    = input_lens.back();
+    auto input_type  = migraphx::shape::bf16_type;
+    auto output_type = migraphx::shape::bf16_type;
+    migraphx::shape s{input_type, input_lens};
+    mm->add_parameter("T1", s);
+
+    auto eyelike_mat = make_r_eyelike(num_rows, num_cols, k);
+    mm->add_literal(migraphx::literal{migraphx::shape{output_type, input_lens}, eyelike_mat});
+
+    auto prog = optimize_onnx("eyelike_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/gemm_bf16_test.cpp b/test/onnx/parse/gemm_bf16_test.cpp
new file mode 100644
index 00000000000..c960a9d7e8a
--- /dev/null
+++ b/test/onnx/parse/gemm_bf16_test.cpp
@@ -0,0 +1,56 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <migraphx/apply_alpha_beta.hpp>
+
+TEST_CASE(gemm_bf16_test)
+{
+    migraphx::program p;
+    auto* mm   = p.get_main_module();
+    auto l0    = mm->add_parameter("A", migraphx::shape{migraphx::shape::bf16_type, {8, 6}});
+    auto l1    = mm->add_parameter("B", migraphx::shape{migraphx::shape::bf16_type, {8, 7}});
+    auto l2    = mm->add_parameter("C", migraphx::shape{migraphx::shape::bf16_type, {6, 1}});
+    auto alpha = 0.5f;
+    auto beta  = 0.8f;
+    auto a_l   = mm->add_literal(alpha);
+    auto t_a   = add_common_op(*mm, migraphx::make_op("mul"), {a_l, l0});
+    t_a        = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::bf16_type}}), t_a);
+    t_a = mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), t_a);
+    std::vector<std::size_t> lens = {6, 7};
+    auto dot = migraphx::add_apply_alpha_beta(*mm, {t_a, l1}, migraphx::make_op("dot"), 1.0f, 0.0f);
+    l2       = mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", lens}}), l2);
+    l2       = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::float_type}}), l2);
+    auto b_l  = mm->add_literal(beta);
+    auto b_b  = mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", lens}}), b_l);
+    auto l2_b = mm->add_instruction(migraphx::make_op("mul"), l2, b_b);
+    l2_b      = mm->add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::bf16_type}}), l2_b);
+    mm->add_instruction(migraphx::make_op("add"), dot, l2_b);
+
+    auto prog = optimize_onnx("gemm_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/group_norm_3d_bf16_test.cpp b/test/onnx/parse/group_norm_3d_bf16_test.cpp
new file mode 100644
index 00000000000..d7cb1da918d
--- /dev/null
+++ b/test/onnx/parse/group_norm_3d_bf16_test.cpp
@@ -0,0 +1,34 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(group_norm_3d_bf16_test)
+{
+    migraphx::program p = make_group_norm(
+        {1, 4, 2}, {2}, {2}, {1, 2, 2, 2}, {2, 3}, 1e-5f, migraphx::shape::bf16_type);
+    auto prog = optimize_onnx("group_norm_3d_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/group_norm_4d_bf16_test.cpp b/test/onnx/parse/group_norm_4d_bf16_test.cpp
new file mode 100644
index 00000000000..3f72ab53385
--- /dev/null
+++ b/test/onnx/parse/group_norm_4d_bf16_test.cpp
@@ -0,0 +1,34 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(group_norm_4d_bf16_test)
+{
+    migraphx::program p = make_group_norm(
+        {1, 4, 3, 3}, {2}, {2}, {1, 2, 2, 3, 3}, {2, 3, 4}, 1e-5f, migraphx::shape::bf16_type);
+    auto prog = optimize_onnx("group_norm_4d_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/group_norm_5d_bf16_test.cpp b/test/onnx/parse/group_norm_5d_bf16_test.cpp
new file mode 100644
index 00000000000..4b66eed2aa8
--- /dev/null
+++ b/test/onnx/parse/group_norm_5d_bf16_test.cpp
@@ -0,0 +1,39 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(group_norm_5d_bf16_test)
+{
+    migraphx::program p = make_group_norm({3, 3, 3, 3, 3},
+                                          {1},
+                                          {1},
+                                          {3, 1, 3, 3, 3, 3},
+                                          {2, 3, 4, 5},
+                                          1e-5f,
+                                          migraphx::shape::bf16_type);
+    auto prog           = optimize_onnx("group_norm_5d_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/group_norm_small_eps_bf16_test.cpp b/test/onnx/parse/group_norm_small_eps_bf16_test.cpp
new file mode 100644
index 00000000000..77151828b4d
--- /dev/null
+++ b/test/onnx/parse/group_norm_small_eps_bf16_test.cpp
@@ -0,0 +1,34 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(group_norm_small_eps_bf16_test)
+{
+    migraphx::program p = make_group_norm(
+        {1, 4, 2}, {2}, {2}, {1, 2, 2, 2}, {2, 3}, 1e-7f, migraphx::shape::bf16_type);
+    auto prog = optimize_onnx("group_norm_small_eps_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/hardsigmoid_bf16_test.cpp b/test/onnx/parse/hardsigmoid_bf16_test.cpp
new file mode 100644
index 00000000000..8a5b510e4ce
--- /dev/null
+++ b/test/onnx/parse/hardsigmoid_bf16_test.cpp
@@ -0,0 +1,58 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(hardsigmoid_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    std::vector<std::size_t> input_lens{1, 3, 4, 5};
+    auto input_type = migraphx::shape::bf16_type;
+    migraphx::shape s{input_type, input_lens};
+    auto x = mm->add_parameter("x", s);
+
+    float alpha = 0.2;
+    float beta  = 0.5;
+
+    auto mb_alpha = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", input_lens}}),
+        mm->add_literal(migraphx::literal{migraphx::shape{input_type}, {alpha}}));
+    auto mb_beta = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", input_lens}}),
+        mm->add_literal(migraphx::literal{migraphx::shape{input_type}, {beta}}));
+    auto mb_zero =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", input_lens}}),
+                            mm->add_literal(migraphx::literal{migraphx::shape{input_type}, {0}}));
+    auto mb_one =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", input_lens}}),
+                            mm->add_literal(migraphx::literal{migraphx::shape{input_type}, {1}}));
+
+    auto mul = mm->add_instruction(migraphx::make_op("mul"), mb_alpha, x);
+    auto add = mm->add_instruction(migraphx::make_op("add"), mb_beta, mul);
+    mm->add_instruction(migraphx::make_op("clip"), add, mb_zero, mb_one);
+
+    auto prog = optimize_onnx("hardsigmoid_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/imagescaler_bf16_test.cpp b/test/onnx/parse/imagescaler_bf16_test.cpp
new file mode 100644
index 00000000000..33d7bc64610
--- /dev/null
+++ b/test/onnx/parse/imagescaler_bf16_test.cpp
@@ -0,0 +1,47 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(imagescaler_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape s{migraphx::shape::bf16_type, {1, 3, 16, 16}};
+    auto l0 = mm->add_parameter("0", s);
+    auto scale_val =
+        mm->add_literal(migraphx::literal{migraphx::shape{migraphx::shape::bf16_type}, {0.5f}});
+    auto bias_vals = mm->add_literal(
+        migraphx::literal{migraphx::shape{migraphx::shape::bf16_type, {3}}, {0.01, 0.02, 0.03}});
+    auto scaled_tensor = mm->add_instruction(
+        migraphx::make_op("scalar", {{"scalar_bcst_dims", s.lens()}}), scale_val);
+    auto img_scaled = mm->add_instruction(migraphx::make_op("mul"), l0, scaled_tensor);
+    auto bias_bcast = mm->add_instruction(
+        migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", s.lens()}}), bias_vals);
+    mm->add_instruction(migraphx::make_op("add"), img_scaled, bias_bcast);
+
+    auto prog = optimize_onnx("imagescaler_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/instance_norm_bf16_test.cpp b/test/onnx/parse/instance_norm_bf16_test.cpp
new file mode 100644
index 00000000000..2cec4854fda
--- /dev/null
+++ b/test/onnx/parse/instance_norm_bf16_test.cpp
@@ -0,0 +1,63 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(instance_norm_bf16_test)
+{
+    std::vector<size_t> dims{1, 2, 3, 3};
+    migraphx::shape s1{migraphx::shape::bf16_type, dims};
+    migraphx::shape s2{migraphx::shape::bf16_type, {2}};
+
+    migraphx::program p;
+    auto* mm   = p.get_main_module();
+    auto x     = mm->add_parameter("0", s1);
+    auto scale = mm->add_parameter("1", s2);
+    auto bias  = mm->add_parameter("2", s2);
+
+    auto mean = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2, 3}}}), x);
+    auto l0   = add_common_op(*mm, migraphx::make_op("sub"), {x, mean});
+    auto l1   = add_common_op(*mm, migraphx::make_op("sqdiff"), {x, mean});
+
+    auto variance = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2, 3}}}), l1);
+    // type of epsilon_literal is same as 0'th input; convert instruction will be added by
+    // add_common_op
+    auto epsilon_literal =
+        mm->add_literal(migraphx::literal{migraphx::shape{migraphx::shape::bf16_type}, {1e-5}});
+    auto l2 = add_common_op(*mm, migraphx::make_op("add"), {variance, epsilon_literal});
+
+    auto l3 = mm->add_instruction(migraphx::make_op("rsqrt"), l2);
+    auto l4 = add_common_op(*mm, migraphx::make_op("mul"), {l0, l3});
+
+    auto scale_bcast = mm->add_instruction(
+        migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), scale);
+    auto bias_bcast = mm->add_instruction(
+        migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", dims}}), bias);
+    auto l5 = mm->add_instruction(migraphx::make_op("mul"), l4, scale_bcast);
+    mm->add_instruction(migraphx::make_op("add"), l5, bias_bcast);
+
+    auto prog = optimize_onnx("instance_norm_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/instance_norm_dyn_batch_bf16_test.cpp b/test/onnx/parse/instance_norm_dyn_batch_bf16_test.cpp
new file mode 100644
index 00000000000..b723e570d67
--- /dev/null
+++ b/test/onnx/parse/instance_norm_dyn_batch_bf16_test.cpp
@@ -0,0 +1,64 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(instance_norm_dyn_batch_bf16_test)
+{
+    // instancenorm with bf16 type, dynamic input in the 0'th (batch) dimension
+    migraphx::shape s1{migraphx::shape::bf16_type, {{1, 2, {2}}, {2, 2}, {3, 3}, {3, 3}}};
+    migraphx::shape s2{migraphx::shape::bf16_type, {2}};
+
+    migraphx::program p;
+    auto* mm   = p.get_main_module();
+    auto x     = mm->add_parameter("0", s1);
+    auto scale = mm->add_parameter("1", s2);
+    auto bias  = mm->add_parameter("2", s2);
+
+    auto mean = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2, 3}}}), x);
+    auto l0   = add_common_op(*mm, migraphx::make_op("sub"), {x, mean});
+    auto l1   = add_common_op(*mm, migraphx::make_op("sqdiff"), {x, mean});
+
+    auto variance = mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {2, 3}}}), l1);
+    // type of epsilon_literal is same as 0'th input; convert instruction will be added by
+    // add_common_op
+    auto epsilon_literal =
+        mm->add_literal(migraphx::literal{migraphx::shape{migraphx::shape::bf16_type}, {1e-5}});
+    auto l2 = add_common_op(*mm, migraphx::make_op("add"), {variance, epsilon_literal});
+
+    auto l3 = mm->add_instruction(migraphx::make_op("rsqrt"), l2);
+    auto l4 = add_common_op(*mm, migraphx::make_op("mul"), {l0, l3});
+
+    auto scale_bcast = mm->add_instruction(migraphx::make_op("broadcast", {{"axis", 1}}), scale, x);
+    auto bias_bcast  = mm->add_instruction(migraphx::make_op("broadcast", {{"axis", 1}}), bias, x);
+    auto l5          = mm->add_instruction(migraphx::make_op("mul"), l4, scale_bcast);
+    auto instance_norm_bf16 = mm->add_instruction(migraphx::make_op("add"), l5, bias_bcast);
+
+    mm->add_return({instance_norm_bf16});
+
+    migraphx::onnx_options options;
+    options.default_dyn_dim_value = {1, 2, {2}};
+    auto prog                     = read_onnx("instance_norm_dyn_batch_bf16_test.onnx", options);
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/isinf_bf16_test.cpp b/test/onnx/parse/isinf_bf16_test.cpp
new file mode 100644
index 00000000000..b7c975adc38
--- /dev/null
+++ b/test/onnx/parse/isinf_bf16_test.cpp
@@ -0,0 +1,38 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(isinf_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape s{migraphx::shape::bf16_type, {2, 3}};
+    auto t1  = mm->add_parameter("t1", s);
+    auto ret = mm->add_instruction(migraphx::make_op("isinf"), t1);
+    mm->add_return({ret});
+
+    auto prog = read_onnx("isinf_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/isnan_bf16_test.cpp b/test/onnx/parse/isnan_bf16_test.cpp
new file mode 100644
index 00000000000..2675932af27
--- /dev/null
+++ b/test/onnx/parse/isnan_bf16_test.cpp
@@ -0,0 +1,38 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(isnan_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape s{migraphx::shape::bf16_type, {2, 3}};
+    auto t1  = mm->add_parameter("t1", s);
+    auto ret = mm->add_instruction(migraphx::make_op("isnan"), t1);
+    mm->add_return({ret});
+
+    auto prog = read_onnx("isnan_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/layer_norm_3d_bf16_test.cpp b/test/onnx/parse/layer_norm_3d_bf16_test.cpp
new file mode 100644
index 00000000000..454ad3cd784
--- /dev/null
+++ b/test/onnx/parse/layer_norm_3d_bf16_test.cpp
@@ -0,0 +1,35 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(layer_norm_3d_bf16_test)
+{
+    migraphx::program p =
+        make_layer_norm({1, 4, 2}, {2}, {2}, 2, false, 1e-5f, migraphx::shape::bf16_type);
+
+    auto prog = optimize_onnx("layer_norm_3d_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/layer_norm_4d_bf16_test.cpp b/test/onnx/parse/layer_norm_4d_bf16_test.cpp
new file mode 100644
index 00000000000..406cbff79c1
--- /dev/null
+++ b/test/onnx/parse/layer_norm_4d_bf16_test.cpp
@@ -0,0 +1,35 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(layer_norm_4d_bf16_test)
+{
+    migraphx::program p =
+        make_layer_norm({3, 3, 3, 3}, {3}, {3}, 3, false, 1e-5f, migraphx::shape::bf16_type);
+
+    auto prog = optimize_onnx("layer_norm_4d_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/layer_norm_small_eps_bf16_test.cpp b/test/onnx/parse/layer_norm_small_eps_bf16_test.cpp
new file mode 100644
index 00000000000..3081bd611af
--- /dev/null
+++ b/test/onnx/parse/layer_norm_small_eps_bf16_test.cpp
@@ -0,0 +1,35 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+#include <onnx_test_utils.hpp>
+
+TEST_CASE(layer_norm_small_eps_bf16_test)
+{
+    migraphx::program p =
+        make_layer_norm({1, 2}, {2}, {1}, 1, true, 1e-7, migraphx::shape::bf16_type);
+
+    auto prog = optimize_onnx("layer_norm_small_eps_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/mean_bf16_test.cpp b/test/onnx/parse/mean_bf16_test.cpp
new file mode 100644
index 00000000000..b96bc129fcd
--- /dev/null
+++ b/test/onnx/parse/mean_bf16_test.cpp
@@ -0,0 +1,46 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(mean_bf16_test)
+{
+    const std::size_t num_data = 3;
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    migraphx::shape s{migraphx::shape::bf16_type, {1, 2, 3}};
+    auto data0   = mm->add_parameter("0", s);
+    auto data1   = mm->add_parameter("1", s);
+    auto data2   = mm->add_parameter("2", s);
+    auto add1    = mm->add_instruction(migraphx::make_op("add"), data0, data1);
+    auto mean    = mm->add_instruction(migraphx::make_op("add"), add1, data2);
+    auto div_lit = mm->add_literal(migraphx::literal{migraphx::shape{s.type()}, {num_data}});
+    auto divisor =
+        mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", s.lens()}}), div_lit);
+    mean = mm->add_instruction(migraphx::make_op("div"), mean, divisor);
+
+    auto prog = optimize_onnx("mean_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/mod_test_fmod_bf16.cpp b/test/onnx/parse/mod_test_fmod_bf16.cpp
new file mode 100644
index 00000000000..64a80e31bbe
--- /dev/null
+++ b/test/onnx/parse/mod_test_fmod_bf16.cpp
@@ -0,0 +1,38 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(mod_test_fmod_bf16)
+{
+    migraphx::program p;
+    auto* mm    = p.get_main_module();
+    auto input0 = mm->add_parameter("0", migraphx::shape{migraphx::shape::bf16_type, {3, 3, 3}});
+    auto input1 = mm->add_parameter("1", migraphx::shape{migraphx::shape::bf16_type, {3, 3, 3}});
+    mm->add_instruction(migraphx::make_op("fmod"), input0, input1);
+
+    auto prog = optimize_onnx("mod_test_fmod_bf16.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/negativeloglikelihoodloss_kd_all_reduction_weighted_test.cpp b/test/onnx/parse/negativeloglikelihoodloss_kd_all_reduction_weighted_test.cpp
index 0bbf82dae9e..235a1374d5c 100644
--- a/test/onnx/parse/negativeloglikelihoodloss_kd_all_reduction_weighted_test.cpp
+++ b/test/onnx/parse/negativeloglikelihoodloss_kd_all_reduction_weighted_test.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -263,3 +263,88 @@ TEST_CASE(negativeloglikelihoodloss_kd_mean_reduction_half_weighted_test)
         optimize_onnx("negativeloglikelihoodloss_kd_mean_reduction_half_weighted_test.onnx");
     EXPECT(p == prog);
 }
+
+TEST_CASE(negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test)
+{
+    migraphx::program p;
+    auto* mm          = p.get_main_module();
+    size_t batch_size = 4;
+    size_t class_size = 4;
+
+    auto scores = mm->add_parameter(
+        "0", migraphx::shape{migraphx::shape::bf16_type, {batch_size, class_size, 2, 2}});
+    auto labels =
+        mm->add_parameter("1", migraphx::shape{migraphx::shape::int32_type, {class_size, 2, 2}});
+    auto weights =
+        mm->add_parameter("2", migraphx::shape{migraphx::shape::bf16_type, {class_size}});
+
+    auto weights_dflt = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::bf16_type, {1}, {0}), {1}));
+    auto labels_idx = mm->add_literal(migraphx::literal(
+        migraphx::shape(migraphx::shape::int32_type, {class_size}, {1}), {0, 1, 2, 3}));
+
+    // Index variables used for gather on k dimensions that span their dimension
+    auto kd_1 = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {2}, {1}), {0, 1}));
+    auto kd_2 = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {2}, {1}), {0, 1}));
+
+    mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {class_size}}}),
+                        weights_dflt);
+
+    weights = mm->add_instruction(migraphx::make_op("neg"), weights);
+    auto unsq_labels =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), labels);
+
+    auto unsq_labels_idx =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2, 3}}}), labels_idx);
+    auto bc_unsq_labels_idx = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx);
+
+    auto unsq_labels_idx2 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0, 2, 3}}}), kd_1);
+    auto bc_unsq_labels_idx2 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx2);
+
+    auto unsq_labels_idx3 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0, 1, 3}}}), kd_2);
+    auto bc_unsq_labels_idx3 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx3);
+
+    auto concat = mm->add_instruction(migraphx::make_op("concat", {{"axis", -1}}),
+                                      bc_unsq_labels_idx,
+                                      bc_unsq_labels_idx2,
+                                      bc_unsq_labels_idx3,
+                                      unsq_labels);
+
+    auto transpose = mm->add_instruction(
+        migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), scores);
+
+    auto gathernd = mm->add_instruction(migraphx::make_op("gathernd"), transpose, concat);
+    auto unsq_mb_weights =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0, 2, 3}}}), weights);
+    auto unsq_mb = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", scores->get_shape().lens()}}),
+        unsq_mb_weights);
+    auto transpose2 = mm->add_instruction(
+        migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), unsq_mb);
+    auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), transpose2, concat);
+
+    auto weighted_loss = mm->add_instruction(migraphx::make_op("mul"), gathernd, gathernd2);
+
+    auto loss_x =
+        mm->add_instruction(migraphx::make_op("reduce_sum", {{"axes", {0, 1, 2}}}), weighted_loss);
+    auto loss_w =
+        mm->add_instruction(migraphx::make_op("reduce_sum", {{"axes", {0, 1, 2}}}), gathernd2);
+
+    loss_w = mm->add_instruction(migraphx::make_op("neg"), loss_w);
+
+    mm->add_instruction(migraphx::make_op("div"), loss_x, loss_w);
+
+    auto prog =
+        optimize_onnx("negativeloglikelihoodloss_kd_mean_reduction_bf16_weighted_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/size_bf16_test.cpp b/test/onnx/parse/size_bf16_test.cpp
new file mode 100644
index 00000000000..b0493ea8511
--- /dev/null
+++ b/test/onnx/parse/size_bf16_test.cpp
@@ -0,0 +1,36 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <onnx_test.hpp>
+
+TEST_CASE(size_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+    auto s   = migraphx::shape{migraphx::shape::bf16_type, {3, 1}};
+    mm->add_parameter("x", s);
+    mm->add_literal(migraphx::literal{migraphx::shape::int64_type, {s.elements()}});
+    auto prog = optimize_onnx("size_bf16_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/softmaxcrossentropyloss_2d_mean_reduction_test.cpp b/test/onnx/parse/softmaxcrossentropyloss_2d_mean_reduction_test.cpp
index e7abca8b0c6..bb0d61ba735 100644
--- a/test/onnx/parse/softmaxcrossentropyloss_2d_mean_reduction_test.cpp
+++ b/test/onnx/parse/softmaxcrossentropyloss_2d_mean_reduction_test.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -158,3 +158,47 @@ TEST_CASE(softmaxcrossentropyloss_2d_mean_reduction_half_test)
 
     EXPECT(p == prog);
 }
+
+TEST_CASE(softmaxcrossentropyloss_2d_mean_reduction_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    auto scores  = mm->add_parameter("0", migraphx::shape{migraphx::shape::bf16_type, {4, 4}});
+    auto labels  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int32_type, {4}});
+    auto weights = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::bf16_type, {1}, {0}), {1}));
+    auto labels_idx = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {4}, {1}), {0, 1, 2, 3}));
+
+    auto mb_weights = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", labels->get_shape().lens()}}), weights);
+
+    mb_weights = mm->add_instruction(migraphx::make_op("neg"), mb_weights);
+
+    auto softmax = mm->add_instruction(migraphx::make_op("softmax"), scores);
+    auto unsq_labels =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), labels);
+    auto unsq_labels_idx =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1}}}), labels_idx);
+    auto bc_unsq_labels_idx = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx);
+    auto concat = mm->add_instruction(
+        migraphx::make_op("concat", {{"axis", -1}}), bc_unsq_labels_idx, unsq_labels);
+    auto gathernd = mm->add_instruction(migraphx::make_op("gathernd"), softmax, concat);
+    auto unsq_mb_weights =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), mb_weights);
+    auto unsq_mb = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", scores->get_shape().lens()}}),
+        unsq_mb_weights);
+    auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), unsq_mb, concat);
+
+    auto logsoftmax    = mm->add_instruction(migraphx::make_op("log"), gathernd);
+    auto weighted_loss = mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
+    mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {0}}}), weighted_loss);
+
+    auto prog = optimize_onnx("softmaxcrossentropyloss_2d_mean_reduction_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/softmaxcrossentropyloss_2d_no_reduction_test.cpp b/test/onnx/parse/softmaxcrossentropyloss_2d_no_reduction_test.cpp
index f1da5e0d0e8..c5c748db593 100644
--- a/test/onnx/parse/softmaxcrossentropyloss_2d_no_reduction_test.cpp
+++ b/test/onnx/parse/softmaxcrossentropyloss_2d_no_reduction_test.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -109,8 +109,8 @@ TEST_CASE(softmaxcrossentropyloss_2d_no_reduction_negative_ignore_idx_test)
         unsq_mb_weights);
     auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), unsq_mb, concat);
 
-    auto logsoftmax    = mm->add_instruction(migraphx::make_op("log"), gathernd);
-    auto loss          = mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
+    auto logsoftmax = mm->add_instruction(migraphx::make_op("log"), gathernd);
+    auto loss       = mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
 
     auto ignore_idx_bc = mm->add_instruction(
         migraphx::make_op("multibroadcast", {{"out_lens", labels->get_shape().lens()}}),
@@ -216,3 +216,46 @@ TEST_CASE(softmaxcrossentropyloss_2d_no_reduction_half_test)
 
     EXPECT(p == prog);
 }
+
+TEST_CASE(softmaxcrossentropyloss_2d_no_reduction_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    auto scores  = mm->add_parameter("0", migraphx::shape{migraphx::shape::bf16_type, {4, 4}});
+    auto labels  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int32_type, {4}});
+    auto weights = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::bf16_type, {1}, {0}), {1}));
+    auto labels_idx = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {4}, {1}), {0, 1, 2, 3}));
+
+    auto mb_weights = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", labels->get_shape().lens()}}), weights);
+    mb_weights = mm->add_instruction(migraphx::make_op("neg"), mb_weights);
+
+    auto softmax = mm->add_instruction(migraphx::make_op("softmax"), scores);
+    auto unsq_labels =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), labels);
+    auto unsq_labels_idx =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1}}}), labels_idx);
+    auto bc_unsq_labels_idx = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx);
+    auto concat = mm->add_instruction(
+        migraphx::make_op("concat", {{"axis", -1}}), bc_unsq_labels_idx, unsq_labels);
+    auto gathernd = mm->add_instruction(migraphx::make_op("gathernd"), softmax, concat);
+    auto unsq_mb_weights =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), mb_weights);
+    auto unsq_mb = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", scores->get_shape().lens()}}),
+        unsq_mb_weights);
+    auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), unsq_mb, concat);
+
+    auto logsoftmax = mm->add_instruction(migraphx::make_op("log"), gathernd);
+
+    mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
+
+    auto prog = optimize_onnx("softmaxcrossentropyloss_2d_no_reduction_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/softmaxcrossentropyloss_2d_sum_reduction_test.cpp b/test/onnx/parse/softmaxcrossentropyloss_2d_sum_reduction_test.cpp
index 3fe7df558a7..cbbd044e522 100644
--- a/test/onnx/parse/softmaxcrossentropyloss_2d_sum_reduction_test.cpp
+++ b/test/onnx/parse/softmaxcrossentropyloss_2d_sum_reduction_test.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -157,3 +157,47 @@ TEST_CASE(softmaxcrossentropyloss_2d_sum_reduction_half_test)
 
     EXPECT(p == prog);
 }
+
+TEST_CASE(softmaxcrossentropyloss_2d_sum_reduction_bf16_test)
+{
+    migraphx::program p;
+    auto* mm = p.get_main_module();
+
+    auto scores  = mm->add_parameter("0", migraphx::shape{migraphx::shape::bf16_type, {4, 4}});
+    auto labels  = mm->add_parameter("1", migraphx::shape{migraphx::shape::int32_type, {4}});
+    auto weights = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::bf16_type, {1}, {0}), {1}));
+    auto labels_idx = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {4}, {1}), {0, 1, 2, 3}));
+
+    auto mb_weights = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", labels->get_shape().lens()}}), weights);
+    mb_weights = mm->add_instruction(migraphx::make_op("neg"), mb_weights);
+
+    auto softmax = mm->add_instruction(migraphx::make_op("softmax"), scores);
+    auto unsq_labels =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), labels);
+    auto unsq_labels_idx =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1}}}), labels_idx);
+    auto bc_unsq_labels_idx = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx);
+    auto concat = mm->add_instruction(
+        migraphx::make_op("concat", {{"axis", -1}}), bc_unsq_labels_idx, unsq_labels);
+    auto gathernd = mm->add_instruction(migraphx::make_op("gathernd"), softmax, concat);
+    auto unsq_mb_weights =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0}}}), mb_weights);
+    auto unsq_mb = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", scores->get_shape().lens()}}),
+        unsq_mb_weights);
+    auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), unsq_mb, concat);
+
+    auto logsoftmax = mm->add_instruction(migraphx::make_op("log"), gathernd);
+
+    auto weighted_loss = mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
+    mm->add_instruction(migraphx::make_op("reduce_sum", {{"axes", {0}}}), weighted_loss);
+
+    auto prog = optimize_onnx("softmaxcrossentropyloss_2d_sum_reduction_bf16_test.onnx");
+
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/parse/softmaxcrossentropyloss_kd_all_reduction_weighted_test.cpp b/test/onnx/parse/softmaxcrossentropyloss_kd_all_reduction_weighted_test.cpp
index 42b9f341336..d7c1699a1bb 100644
--- a/test/onnx/parse/softmaxcrossentropyloss_kd_all_reduction_weighted_test.cpp
+++ b/test/onnx/parse/softmaxcrossentropyloss_kd_all_reduction_weighted_test.cpp
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -173,7 +173,7 @@ TEST_CASE(softmaxcrossentropyloss_kd_no_reduction_weighted_test)
         migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), unsq_mb);
     auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), transpose2, concat);
 
-    auto logsoftmax    = mm->add_instruction(migraphx::make_op("log"), gathernd);
+    auto logsoftmax = mm->add_instruction(migraphx::make_op("log"), gathernd);
     mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
 
     auto prog = optimize_onnx("softmaxcrossentropyloss_kd_no_reduction_weighted_test.onnx");
@@ -266,3 +266,89 @@ TEST_CASE(softmaxcrossentropyloss_kd_mean_reduction_half_weighted_test)
     auto prog = optimize_onnx("softmaxcrossentropyloss_kd_mean_reduction_half_weighted_test.onnx");
     EXPECT(p == prog);
 }
+
+TEST_CASE(softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test)
+{
+    migraphx::program p;
+    auto* mm          = p.get_main_module();
+    size_t batch_size = 4;
+    size_t class_size = 4;
+
+    auto scores = mm->add_parameter(
+        "0", migraphx::shape{migraphx::shape::bf16_type, {batch_size, class_size, 2, 2}});
+    auto labels =
+        mm->add_parameter("1", migraphx::shape{migraphx::shape::int32_type, {class_size, 2, 2}});
+    auto weights =
+        mm->add_parameter("2", migraphx::shape{migraphx::shape::bf16_type, {class_size}});
+
+    auto weights_dflt = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::bf16_type, {1}, {0}), {1}));
+    auto labels_idx = mm->add_literal(migraphx::literal(
+        migraphx::shape(migraphx::shape::int32_type, {class_size}, {1}), {0, 1, 2, 3}));
+
+    // Index variables used for gather on k dimensions that span their dimension
+    auto kd_1 = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {2}, {1}), {0, 1}));
+    auto kd_2 = mm->add_literal(
+        migraphx::literal(migraphx::shape(migraphx::shape::int32_type, {2}, {1}), {0, 1}));
+
+    mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {class_size}}}),
+                        weights_dflt);
+    weights = mm->add_instruction(migraphx::make_op("neg"), weights);
+
+    auto softmax = mm->add_instruction(migraphx::make_op("softmax"), scores);
+    auto unsq_labels =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {-1}}}), labels);
+
+    auto unsq_labels_idx =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2, 3}}}), labels_idx);
+    auto bc_unsq_labels_idx = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx);
+
+    auto unsq_labels_idx2 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0, 2, 3}}}), kd_1);
+    auto bc_unsq_labels_idx2 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx2);
+
+    auto unsq_labels_idx3 =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0, 1, 3}}}), kd_2);
+    auto bc_unsq_labels_idx3 = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", unsq_labels->get_shape().lens()}}),
+        unsq_labels_idx3);
+
+    auto concat = mm->add_instruction(migraphx::make_op("concat", {{"axis", -1}}),
+                                      bc_unsq_labels_idx,
+                                      bc_unsq_labels_idx2,
+                                      bc_unsq_labels_idx3,
+                                      unsq_labels);
+
+    auto transpose = mm->add_instruction(
+        migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), softmax);
+
+    auto gathernd = mm->add_instruction(migraphx::make_op("gathernd"), transpose, concat);
+    auto unsq_mb_weights =
+        mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {0, 2, 3}}}), weights);
+    auto unsq_mb = mm->add_instruction(
+        migraphx::make_op("multibroadcast", {{"out_lens", scores->get_shape().lens()}}),
+        unsq_mb_weights);
+    auto transpose2 = mm->add_instruction(
+        migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), unsq_mb);
+    auto gathernd2 = mm->add_instruction(migraphx::make_op("gathernd"), transpose2, concat);
+
+    auto logsoftmax = mm->add_instruction(migraphx::make_op("log"), gathernd);
+
+    auto weighted_loss = mm->add_instruction(migraphx::make_op("mul"), logsoftmax, gathernd2);
+
+    auto loss_x =
+        mm->add_instruction(migraphx::make_op("reduce_sum", {{"axes", {0, 1, 2}}}), weighted_loss);
+    auto loss_w =
+        mm->add_instruction(migraphx::make_op("reduce_sum", {{"axes", {0, 1, 2}}}), gathernd2);
+    loss_w = mm->add_instruction(migraphx::make_op("neg"), loss_w);
+
+    mm->add_instruction(migraphx::make_op("div"), loss_x, loss_w);
+
+    auto prog = optimize_onnx("softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test.onnx");
+    EXPECT(p == prog);
+}
diff --git a/test/onnx/round_bf16_test.onnx b/test/onnx/round_bf16_test.onnx
new file mode 100644
index 00000000000..d5f5bcad121
--- /dev/null
+++ b/test/onnx/round_bf16_test.onnx
@@ -0,0 +1,11 @@
+	round_bf16_test:J
+
+xy"Roundround_bf16_testZ
+x
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/size_bf16_test.onnx b/test/onnx/size_bf16_test.onnx
new file mode 100644
index 00000000000..85c42208cf3
--- /dev/null
+++ b/test/onnx/size_bf16_test.onnx
@@ -0,0 +1,11 @@
+	size_bf16_test:D
+
+xy"Sizesize_bf16_testZ
+x
+
+
+b
+y
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/softmaxcrossentropyloss_2d_mean_reduction_bf16_test.onnx b/test/onnx/softmaxcrossentropyloss_2d_mean_reduction_bf16_test.onnx
new file mode 100644
index 00000000000..3c9a4b6ebd2
--- /dev/null
+++ b/test/onnx/softmaxcrossentropyloss_2d_mean_reduction_bf16_test.onnx
@@ -0,0 +1,17 @@
+	3softmaxcrossentropyloss_2d_mean_reduction_bf16_test:�
+8
+0
+12"SoftmaxCrossEntropyLoss*
+	reduction"mean�3softmaxcrossentropyloss_2d_mean_reduction_bf16_testZ
+0
+
+
+Z
+1
+
+
+b
+2
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/softmaxcrossentropyloss_2d_no_reduction_bf16_test.onnx b/test/onnx/softmaxcrossentropyloss_2d_no_reduction_bf16_test.onnx
new file mode 100644
index 00000000000..b714484bf32
--- /dev/null
+++ b/test/onnx/softmaxcrossentropyloss_2d_no_reduction_bf16_test.onnx
@@ -0,0 +1,17 @@
+	1softmaxcrossentropyloss_2d_no_reduction_bf16_test:�
+8
+0
+12"SoftmaxCrossEntropyLoss*
+	reduction"none�1softmaxcrossentropyloss_2d_no_reduction_bf16_testZ
+0
+
+
+Z
+1
+
+
+b
+2
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/softmaxcrossentropyloss_2d_sum_reduction_bf16_test.onnx b/test/onnx/softmaxcrossentropyloss_2d_sum_reduction_bf16_test.onnx
new file mode 100644
index 00000000000..b9ebaf16c00
--- /dev/null
+++ b/test/onnx/softmaxcrossentropyloss_2d_sum_reduction_bf16_test.onnx
@@ -0,0 +1,17 @@
+	2softmaxcrossentropyloss_2d_sum_reduction_bf16_test:�
+7
+0
+12"SoftmaxCrossEntropyLoss*
+	reduction"sum�2softmaxcrossentropyloss_2d_sum_reduction_bf16_testZ
+0
+
+
+Z
+1
+
+
+b
+2
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test.onnx b/test/onnx/softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test.onnx
new file mode 100644
index 00000000000..38a94071f29
--- /dev/null
+++ b/test/onnx/softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test.onnx
@@ -0,0 +1,25 @@
+	<softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_test:�
+;
+0
+1
+23"SoftmaxCrossEntropyLoss*
+	reduction"mean�<softmaxcrossentropyloss_kd_mean_reduction_bf16_weighted_testZ
+0
+
+
+
+
+Z
+1
+
+
+
+Z
+2
+
+
+b
+3
+
+
+B
\ No newline at end of file
diff --git a/test/onnx/verify/add_bf16_test.cpp b/test/onnx/verify/add_bf16_test.cpp
new file mode 100644
index 00000000000..7ff64229dfa
--- /dev/null
+++ b/test/onnx/verify/add_bf16_test.cpp
@@ -0,0 +1,49 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/bf16.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+
+TEST_CASE(add_bf16_test)
+{
+    auto p = optimize_onnx("add_bf16_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    migraphx::shape s{migraphx::shape::bf16_type, {1}};
+
+    migraphx::parameter_map pp;
+    migraphx::literal l1{s, {3.25}};
+    migraphx::literal l2{s, {2.25}};
+    pp["0"] = l1.get_argument();
+    pp["1"] = l2.get_argument();
+
+    auto result = p.eval(pp).back();
+    std::vector<migraphx::bf16> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+    std::vector<migraphx::bf16> gold{static_cast<migraphx::bf16>(5.5)};
+
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/gelu_default_bf16_test.cpp b/test/onnx/verify/gelu_default_bf16_test.cpp
new file mode 100644
index 00000000000..58bab25f576
--- /dev/null
+++ b/test/onnx/verify/gelu_default_bf16_test.cpp
@@ -0,0 +1,56 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+
+TEST_CASE(gelu_default_bf16_test)
+{
+    migraphx::program p = read_onnx("gelu_default_bf16_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    std::vector<std::size_t> input_lens{3, 3};
+    auto input_type = migraphx::shape::bf16_type;
+    migraphx::shape data_shape{input_type, input_lens};
+    std::vector<float> tmp = {-100.0f, -7.5f, -5.2f, -1.0f, 0.0f, 1.5f, 4.9f, 8.2f, 1000.0f};
+    std::vector<migraphx::bf16> data = {tmp.begin(), tmp.end()};
+
+    migraphx::parameter_map pp;
+    pp["x"] = migraphx::argument(data_shape, data.data());
+
+    auto result = p.eval(pp).back();
+    std::vector<float> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+
+    // gold values according to specification:
+    // https://github.com/onnx/onnx/blob/main/docs/Operators.md#examples-59
+    // x = np.array([-100.0, -7.5, -5.2, -1.0, 0.0, 1.5, 4.9, 8.2, 1000.0]).astype(np.float16)
+    // (0.5 * x * (1 + np.vectorize(math.erf)(x / np.sqrt(2)))).astype(np.float16)
+    // tmp = {0.0f, 0.0f, -5.364e-07f, -0.1587f, 0.0f, 1.399f, 4.898f, 8.203f, 1000.0f};
+    tmp = {0.0f, 0.0f, 0.0f, -0.160156f, 0.0f, 1.399f, 4.84375f, 8.203f, 1000.0f};
+
+    std::vector<migraphx::bf16> gold = {tmp.begin(), tmp.end()};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/gemm_bf16_test.cpp b/test/onnx/verify/gemm_bf16_test.cpp
new file mode 100644
index 00000000000..f77204804a5
--- /dev/null
+++ b/test/onnx/verify/gemm_bf16_test.cpp
@@ -0,0 +1,71 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+
+TEST_CASE(gemm_bf16_test)
+{
+    migraphx::program p = read_onnx("gemm_bf16_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    migraphx::shape a_shape{migraphx::shape::bf16_type, {8, 6}};
+    std::vector<float> tmp = {0.2646, 0.8525, 0.4192, 0.1415, 0.4321,  0.675,  0.4248, 0.8203,
+                              0.978,  0.5796, 0.6626, 0.479,  0.924,   0.734,  0.674,  0.8716,
+                              0.3733, 0.3328, 0.4272, 0.0247, 0.7583,  0.4873, 0.5835, 0.694,
+                              0.4375, 0.2406, 0.269,  0.6763, 0.542,   0.8994, 0.657,  0.5425,
+                              0.1412, 0.8994, 0.2183, 0.812,  0.937,   0.3438, 0.712,  0.9033,
+                              0.266,  0.8013, 0.803,  0.4993, 0.07196, 0.635,  0.7344, 0.3213};
+    std::vector<migraphx::bf16> a_data{tmp.cbegin(), tmp.cend()};
+
+    migraphx::shape b_shape{migraphx::shape::bf16_type, {8, 7}};
+    tmp = {0.7095,  0.612,  0.741,  0.02121, 0.3872, 0.4482,  0.6235,  0.02249, 0.2332, 0.7656,
+           0.8955,  0.8154, 0.2239, 0.9277,  0.4622, 0.708,   0.566,   0.0736,  0.138,  0.8574,
+           0.4055,  0.382,  0.6206, 0.424,   0.3674, 0.435,   0.998,   0.3594,  0.701,  0.6216,
+           0.01826, 0.6313, 0.514,  0.1095,  0.3203, 0.01636, 0.537,   0.01952, 0.4502, 0.8965,
+           0.5415,  0.7456, 0.793,  0.756,   0.9,    0.5264,  0.05368, 0.4214,  0.276,  0.1517,
+           0.08453, 0.83,   0.417,  0.1682,  0.845,  0.1729};
+    std::vector<migraphx::bf16> b_data{tmp.cbegin(), tmp.cend()};
+
+    migraphx::shape c_shape{migraphx::shape::bf16_type, {6, 1}};
+    tmp = {0.10846, 0.672, 0.527, 0.94, 0.429, 0.2291};
+    std::vector<migraphx::bf16> c_data{tmp.cbegin(), tmp.cend()};
+
+    migraphx::parameter_map params;
+    params["A"] = migraphx::argument(a_shape, a_data.data());
+    params["B"] = migraphx::argument(b_shape, b_data.data());
+    params["C"] = migraphx::argument(c_shape, c_data.data());
+
+    auto result = p.eval(params).back();
+    std::vector<migraphx::bf16> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+
+    tmp = {1.071, 1.378, 1.465, 1.093, 0.968, 1.542, 1.145, 1.287,  1.533, 1.75,  1.338,
+           1.449, 1.592, 1.668, 1.265, 1.531, 1.656, 1.348, 1.2705, 1.525, 1.479, 1.754,
+           2.143, 2.062, 1.921, 1.836, 2.203, 1.952, 1.055, 1.225,  1.418, 1.209, 1.155,
+           1.42,  1.234, 1.302, 1.593, 1.368, 1.289, 1.327, 1.451,  1.394};
+    std::vector<migraphx::bf16> gold{tmp.cbegin(), tmp.cend()};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/gridsample_bf16_test.cpp b/test/onnx/verify/gridsample_bf16_test.cpp
new file mode 100644
index 00000000000..9586427b66c
--- /dev/null
+++ b/test/onnx/verify/gridsample_bf16_test.cpp
@@ -0,0 +1,61 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+
+TEST_CASE(gridsample_bf16_test)
+{
+    migraphx::program p = read_onnx("gridsample_bf16_test.onnx");
+    migraphx::compile_options options;
+    p.compile(migraphx::make_target("ref"));
+
+    migraphx::shape data_shape{migraphx::shape::bf16_type, {1, 1, 4, 4}};
+    migraphx::shape grid_shape{migraphx::shape::float_type, {1, 6, 6, 2}};
+    std::vector<float> tmp = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.};
+    std::vector<migraphx::bf16> data = {tmp.begin(), tmp.end()};
+    std::vector<float> grid = {-1., -1.,  -0.6, -1.,  -0.2, -1.,  0.2, -1.,  0.6, -1.,  1., -1.,
+                               -1., -0.6, -0.6, -0.6, -0.2, -0.6, 0.2, -0.6, 0.6, -0.6, 1., -0.6,
+                               -1., -0.2, -0.6, -0.2, -0.2, -0.2, 0.2, -0.2, 0.6, -0.2, 1., -0.2,
+                               -1., 0.2,  -0.6, 0.2,  -0.2, 0.2,  0.2, 0.2,  0.6, 0.2,  1., 0.2,
+                               -1., 0.6,  -0.6, 0.6,  -0.2, 0.6,  0.2, 0.6,  0.6, 0.6,  1., 0.6,
+                               -1., 1.,   -0.6, 1.,   -0.2, 1.,   0.2, 1.,   0.6, 1.,   1., 1.};
+
+    migraphx::parameter_map pp;
+    pp["x"]    = migraphx::argument(data_shape, data.data());
+    pp["grid"] = migraphx::argument(grid_shape, grid.data());
+
+    auto result = p.eval(pp).back();
+    std::vector<migraphx::bf16> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+
+    tmp = {0.,  0.15, 0.55, 0.95, 1.35, 0.75, 0.6, 1.5,  2.3,  3.1,  3.9,  2.1,
+           2.2, 4.7,  5.5,  6.3,  7.1,  3.7,  3.8, 7.9,  8.7,  9.5,  10.3, 5.3,
+           5.4, 11.1, 11.9, 12.7, 13.5, 6.9,  3.,  6.15, 6.55, 6.95, 7.35, 3.75};
+
+    std::vector<migraphx::bf16> gold = {tmp.begin(), tmp.end()};
+
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/group_norm_3d_bf16_test.cpp b/test/onnx/verify/group_norm_3d_bf16_test.cpp
new file mode 100644
index 00000000000..521cc2395a9
--- /dev/null
+++ b/test/onnx/verify/group_norm_3d_bf16_test.cpp
@@ -0,0 +1,46 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+#include <onnx_verify_utils.hpp>
+
+TEST_CASE(group_norm_bf16_test)
+{
+    using migraphx::bf16;
+    std::vector<bf16> scale{bf16{1.2}, bf16{0.8}};
+    std::vector<bf16> bias{bf16{0.5}, bf16{0.2}};
+    std::vector<bf16> result_vector =
+        norm_test<bf16>({1, 4, 2}, scale, bias, read_onnx("group_norm_3d_bf16_test.onnx"));
+    std::vector<bf16> gold = {bf16{-1.10996256},
+                              bf16{-0.0366542},
+                              bf16{1.0366542},
+                              bf16{2.10996256},
+                              bf16{-0.87330837},
+                              bf16{-0.15776947},
+                              bf16{0.55776947},
+                              bf16{1.27330837}};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/isinf_bf16_test.cpp b/test/onnx/verify/isinf_bf16_test.cpp
new file mode 100644
index 00000000000..70bdbafc92b
--- /dev/null
+++ b/test/onnx/verify/isinf_bf16_test.cpp
@@ -0,0 +1,50 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+
+TEST_CASE(isinf_bf16_test)
+{
+    migraphx::program p = read_onnx("isinf_bf16_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    migraphx::shape s{migraphx::shape::bf16_type, {2, 3}};
+    migraphx::parameter_map pp;
+    migraphx::bf16 nan               = std::numeric_limits<migraphx::bf16>::quiet_NaN();
+    migraphx::bf16 infinity          = std::numeric_limits<migraphx::bf16>::infinity();
+    migraphx::bf16 max               = std::numeric_limits<migraphx::bf16>::max();
+    migraphx::bf16 min               = std::numeric_limits<migraphx::bf16>::min();
+    migraphx::bf16 val               = migraphx::bf16(3.6);
+    std::vector<migraphx::bf16> data = {-infinity, nan, min, val, max, infinity};
+    pp["t1"]                         = migraphx::argument(s, data.data());
+
+    auto result = p.eval(pp).back();
+    std::vector<float> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+
+    std::vector<float> gold = {1, 0, 0, 0, 0, 1};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/layer_norm_3d_bf16_test.cpp b/test/onnx/verify/layer_norm_3d_bf16_test.cpp
new file mode 100644
index 00000000000..c4cc3018e9c
--- /dev/null
+++ b/test/onnx/verify/layer_norm_3d_bf16_test.cpp
@@ -0,0 +1,46 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+#include <onnx_verify_utils.hpp>
+
+TEST_CASE(layer_norm_bf16_test)
+{
+    using migraphx::bf16;
+    std::vector<bf16> scale{bf16{1.2}, bf16{0.8}};
+    std::vector<bf16> bias{bf16{0.5}, bf16{0.2}};
+    std::vector<bf16> result_vector =
+        norm_test<bf16>({1, 4, 2}, scale, bias, read_onnx("layer_norm_3d_bf16_test.onnx"));
+    std::vector<bf16> gold = {bf16{-0.69997597},
+                              bf16{0.99998398},
+                              bf16{-0.69997597},
+                              bf16{0.99998398},
+                              bf16{-0.69997597},
+                              bf16{0.99998398},
+                              bf16{-0.69997597},
+                              bf16{0.99998398}};
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}
diff --git a/test/onnx/verify/mvn_default_axes_bf16_test.cpp b/test/onnx/verify/mvn_default_axes_bf16_test.cpp
new file mode 100644
index 00000000000..768d8bca92c
--- /dev/null
+++ b/test/onnx/verify/mvn_default_axes_bf16_test.cpp
@@ -0,0 +1,51 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+#include <onnx_verify_utils.hpp>
+
+TEST_CASE(mvn_default_axes_bf16_test)
+{
+    using migraphx::bf16;
+    auto result = mvn_test<bf16>({2, 2, 2, 2}, read_onnx("mvn_default_axes_bf16_test.onnx"));
+    std::vector<bf16> gold{bf16{-1.324},
+                           bf16{-1.084},
+                           bf16{-0.843},
+                           bf16{-0.602},
+                           bf16{-1.324},
+                           bf16{-1.084},
+                           bf16{-0.843},
+                           bf16{-0.602},
+                           bf16{0.602},
+                           bf16{0.843},
+                           bf16{1.084},
+                           bf16{1.324},
+                           bf16{0.602},
+                           bf16{0.843},
+                           bf16{1.084},
+                           bf16{1.324}};
+    EXPECT(migraphx::verify::verify_rms_range(result, gold));
+}
diff --git a/test/onnx/verify/mvn_rank_2_bf16_test.cpp b/test/onnx/verify/mvn_rank_2_bf16_test.cpp
new file mode 100644
index 00000000000..f8311887958
--- /dev/null
+++ b/test/onnx/verify/mvn_rank_2_bf16_test.cpp
@@ -0,0 +1,36 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+#include <onnx_verify_utils.hpp>
+
+TEST_CASE(mvn_rank_2_bf16_test)
+{
+    using migraphx::bf16;
+    auto result = mvn_test<migraphx::bf16>({2, 2}, read_onnx("mvn_rank_2_bf16_test.onnx"));
+    std::vector<migraphx::bf16> gold{bf16{-1}, bf16{1}, bf16{-1}, bf16{1}};
+    EXPECT(migraphx::verify::verify_rms_range(result, gold));
+}
diff --git a/test/onnx/verify/mvn_rank_3_bf16_test.cpp b/test/onnx/verify/mvn_rank_3_bf16_test.cpp
new file mode 100644
index 00000000000..f6e92a36335
--- /dev/null
+++ b/test/onnx/verify/mvn_rank_3_bf16_test.cpp
@@ -0,0 +1,43 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+#include <onnx_verify_utils.hpp>
+
+TEST_CASE(mvn_rank_3_bf16_test)
+{
+    using migraphx::bf16;
+    auto result = mvn_test<bf16>({2, 2, 2}, read_onnx("mvn_rank_3_bf16_test.onnx"));
+    std::vector<bf16> gold{bf16{-1.342},
+                           bf16{-1.342},
+                           bf16{-0.4473},
+                           bf16{-0.4473},
+                           bf16{0.4473},
+                           bf16{0.4473},
+                           bf16{1.342},
+                           bf16{1.342}};
+    EXPECT(migraphx::verify::verify_rms_range(result, gold));
+}
diff --git a/test/onnx/verify/round_bf16_test.cpp b/test/onnx/verify/round_bf16_test.cpp
new file mode 100644
index 00000000000..811ed0b982b
--- /dev/null
+++ b/test/onnx/verify/round_bf16_test.cpp
@@ -0,0 +1,64 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/register_target.hpp>
+#include <migraphx/verify.hpp>
+#include <onnx_test.hpp>
+
+TEST_CASE(round_bf16_test)
+{
+    migraphx::program p = read_onnx("round_bf16_test.onnx");
+    p.compile(migraphx::make_target("ref"));
+
+    migraphx::shape xs{migraphx::shape::bf16_type, {4, 4}};
+    std::vector<float> tmp = {-3.51,
+                              -3.5,
+                              -3.49,
+                              -2.51,
+                              -2.50,
+                              -2.49,
+                              -1.6,
+                              -1.5,
+                              -0.51,
+                              -0.5,
+                              0.5,
+                              0.6,
+                              2.4,
+                              2.5,
+                              3.5,
+                              4.5};
+    std::vector<migraphx::bf16> data{tmp.cbegin(), tmp.cend()};
+    migraphx::parameter_map param_map;
+    param_map["x"] = migraphx::argument(xs, data.data());
+
+    auto result = p.eval(param_map).back();
+
+    std::vector<migraphx::bf16> result_vector;
+    result.visit([&](auto output) { result_vector.assign(output.begin(), output.end()); });
+
+    tmp = {-4.0, -4.0, -3.0, -3.0, -2.0, -2.0, -2.0, -2.0, -1.0, 0.0, 0.0, 1.0, 2.0, 2.0, 4.0, 4.0};
+    std::vector<migraphx::bf16> gold{tmp.cbegin(), tmp.cend()};
+
+    EXPECT(migraphx::verify::verify_rms_range(result_vector, gold));
+}

From 22e323c8c405ba169faaacc3816e681a5ad77adc Mon Sep 17 00:00:00 2001
From: Ahsan Saghir <142340507+ahsan-ca@users.noreply.github.com>
Date: Tue, 14 Jan 2025 19:30:12 -0500
Subject: [PATCH 12/12] Update fuse gemm test to use get_compute_fp32_flag()
 function (#3742)

---
 test/gpu/fuse_gemm.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/gpu/fuse_gemm.cpp b/test/gpu/fuse_gemm.cpp
index cecf8d95990..8ee25c783d4 100644
--- a/test/gpu/fuse_gemm.cpp
+++ b/test/gpu/fuse_gemm.cpp
@@ -94,7 +94,10 @@ TEST_CASE(gemm_pointwise_add)
         else
         {
             auto gemm_oper =
-                migraphx::make_op("gpu::gemm", {{"alpha", 1}, {"beta", 1}, {"compute_fp32", 1}});
+                migraphx::make_op("gpu::gemm",
+                                  {{"alpha", 1},
+                                   {"beta", 1},
+                                   {"compute_fp32", migraphx::gpu::get_compute_fp32_flag()}});
             auto add = mm->add_instruction(gemm_oper, a, b, x, output);
             mm->add_return({add});
         }