diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml
index bfd6064ba1..f8ef31fcc2 100644
--- a/.github/workflows/benchmarks-reusable.yml
+++ b/.github/workflows/benchmarks-reusable.yml
@@ -220,11 +220,12 @@ jobs:
         --compute-runtime ${{ inputs.compute_runtime_commit }}
         --build-igc
         ${{ inputs.upload_report && '--output-html' || '' }}
+        ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
         ${{ inputs.bench_script_params }}
 
     - name: Print benchmark results
       run: |
-        cat ${{ github.workspace }}/ur-repo/benchmark_results.md
+        cat ${{ github.workspace }}/ur-repo/benchmark_results.md || true
 
     - name: Add comment to PR
       uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 7de3926daf..edcb5c02f2 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -24,7 +24,7 @@ on:
         type: number
         required: true
       bench_script_params:
-        description: Parameters passed to script executing benchmark
+        description: Parameters passed to the script executing benchmark (recommended `--compare baseline`)
         type: string
         required: false
         default: ''
diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md
index 9cef0e52a3..ec4c75c3bf 100644
--- a/scripts/benchmarks/README.md
+++ b/scripts/benchmarks/README.md
@@ -27,7 +27,7 @@ You can also include additional benchmark parameters, such as environment variab
 
 Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request.
 
-By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data.
+It is recommended that all benchmark runs should be compared against `baseline` by passing `--compare baseline` to benchmark parameters. `baseline` is a well-established set of the latest data.
 
 You must be a member of the `oneapi-src` organization to access these features.
 
@@ -35,13 +35,14 @@ You must be a member of the `oneapi-src` organization to access these features.
 
 By default, the benchmark results are not stored. To store them, use the option `--save <name>`. This will make the results available for comparison during the next benchmark runs.
 
-To compare a benchmark run with a previously stored result, use the option `--compare <name>`. You can compare with more than one result.
-
-If no `--compare` option is specified, the benchmark run is compared against a previously stored `baseline`.
+You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare <previously_saved_data>` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare <name1> --compare <name2> --relative-perf <name1>`, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis.
 
 Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
 are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html).
 
+## Output formats
+You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`.
+
 ## Requirements
 
 ### Python
diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py
index 77524a6e02..a740c02672 100755
--- a/scripts/benchmarks/main.py
+++ b/scripts/benchmarks/main.py
@@ -189,9 +189,12 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         benchmark.teardown()
         print("complete.")
 
-    this_name = "This PR"
 
-    chart_data = {this_name : results}
+    this_name = options.current_run_name
+    chart_data = {}
+
+    if not options.dry_run:
+        chart_data = {this_name : results}
 
     history = BenchmarkHistory(directory)
     # limit how many files we load.
@@ -199,7 +202,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     history.load(1000)
 
     # remove duplicates. this can happen if e.g., --compare baseline is specified manually.
-    compare_names = list(dict.fromkeys(compare_names))
+    compare_names = list(dict.fromkeys(compare_names)) if compare_names is not None else []
 
     for name in compare_names:
         compare_result = history.get_compare(name)
@@ -207,7 +210,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             chart_data[name] = compare_result.results
 
     if options.output_markdown:
-        markdown_content = generate_markdown(this_name, chart_data)
+        markdown_content = generate_markdown(this_name, chart_data, options.output_markdown)
 
         with open('benchmark_results.md', 'w') as file:
             file.write(markdown_content)
@@ -251,7 +254,7 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument("--no-rebuild", help='Do not rebuild the benchmarks from scratch.', action="store_true")
     parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[])
     parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.')
-    parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"])
+    parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append")
     parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=options.iterations)
     parser.add_argument("--stddev-threshold", type=float, help='If stddev pct is above this threshold, rerun all iterations', default=options.stddev_threshold)
     parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=options.timeout)
@@ -261,12 +264,13 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument("--exit-on-failure", help='Exit on first failure.', action="store_true")
     parser.add_argument("--compare-type", type=str, choices=[e.value for e in Compare], help='Compare results against previously saved data.', default=Compare.LATEST.value)
     parser.add_argument("--compare-max", type=int, help='How many results to read for comparisions', default=options.compare_max)
+    parser.add_argument("--output-markdown", nargs='?', const=options.output_markdown, help='Specify whether markdown output should fit the content size limit for request validation')
     parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False)
-    parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True)
     parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False)
     parser.add_argument("--compute-runtime", nargs='?', const=options.compute_runtime_tag, help="Fetch and build compute runtime")
     parser.add_argument("--iterations-stddev", type=int, help="Max number of iterations of the loop calculating stddev after completed benchmark runs", default=options.iterations_stddev)
     parser.add_argument("--build-igc", help="Build IGC from source instead of using the OS-installed version", action="store_true", default=options.build_igc)
+    parser.add_argument("--relative-perf",  type=str, help="The name of the results which should be used as a baseline for metrics calculation", default=options.current_run_name)
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -283,12 +287,13 @@ def validate_and_parse_env_args(env_args):
     options.exit_on_failure = args.exit_on_failure
     options.compare = Compare(args.compare_type)
     options.compare_max = args.compare_max
-    options.output_html = args.output_html
     options.output_markdown = args.output_markdown
+    options.output_html = args.output_html
     options.dry_run = args.dry_run
     options.umf = args.umf
     options.iterations_stddev = args.iterations_stddev
     options.build_igc = args.build_igc
+    options.current_run_name = args.relative_perf
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/scripts/benchmarks/options.py b/scripts/benchmarks/options.py
index 1bd79f6878..772fee2e02 100644
--- a/scripts/benchmarks/options.py
+++ b/scripts/benchmarks/options.py
@@ -6,6 +6,10 @@ class Compare(Enum):
     AVERAGE = 'average'
     MEDIAN = 'median'
 
+class MarkdownSize(Enum):
+    SHORT = 'short'
+    FULL = 'full'
+
 @dataclass
 class Options:
     workdir: str = None
@@ -20,8 +24,8 @@ class Options:
     verbose: bool = False
     compare: Compare = Compare.LATEST
     compare_max: int = 10 # average/median over how many results
+    output_markdown: MarkdownSize = MarkdownSize.SHORT
     output_html: bool = False
-    output_markdown: bool = True
     dry_run: bool = False
     # these two should probably be merged into one setting
     stddev_threshold: float = 0.02
@@ -32,6 +36,7 @@ class Options:
     extra_env_vars: dict = field(default_factory=dict)
     compute_runtime_tag: str = '24.52.32224.10'
     build_igc: bool = False
+    current_run_name: str = "This PR"
 
 options = Options()
 
diff --git a/scripts/benchmarks/output_markdown.py b/scripts/benchmarks/output_markdown.py
index fc3b65507b..552e924f4f 100644
--- a/scripts/benchmarks/output_markdown.py
+++ b/scripts/benchmarks/output_markdown.py
@@ -1,12 +1,13 @@
-# Copyright (C) 2024 Intel Corporation
-# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# Copyright (C) 2024-2025 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM 
+# Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import collections, re
+import collections
 from benches.result import Result
-from options import options
-import math
+from options import options, MarkdownSize
+import ast
 
 class OutputLine:
     def __init__(self, name):
@@ -14,6 +15,8 @@ def __init__(self, name):
         self.diff = None
         self.bars = None
         self.row = ""
+        self.suite = "Unknown"
+        self.explicit_group = ""
 
     def __str__(self):
         return f"(Label:{self.label}, diff:{self.diff})"
@@ -21,40 +24,167 @@ def __str__(self):
     def __repr__(self):
         return self.__str__()
 
-# Function to generate the markdown collapsible sections for each variant
-def generate_markdown_details(results: list[Result]):
-    markdown_sections = []
+# The number of the required columns in the markdown table,
+# independent of the chart_data content.
+# Required columns:
+# - benchmark_name
+#
+# optional +1: relative performance
+num_info_columns = 1
+
+# Number of columns required for relative performance change calculation.
+# In case of multiple provided saved baselines to compare, the relative
+# performance is not calculated, since the base (hopefully) usage case 
+# for this script would be comparing the performance of PR with the main branch
+num_baselines_required_for_rel_change = 2
+
+# Maximum number of characters that is allowed in request validation
+# for posting comments in GitHub PRs
+max_markdown_size = 65536
+
+
+def is_relative_perf_comparison_to_be_performed(chart_data: 
+                                                dict[str, list[Result]], 
+                                                baseline_name: str):
+    return (len(chart_data) == num_baselines_required_for_rel_change) and \
+            (baseline_name in chart_data.keys())
+    
+
+def get_chart_markdown_header(chart_data: dict[str, list[Result]], 
+                              baseline_name: str):
+    summary_header = ''
+    final_num_columns = num_info_columns
+
+    if is_relative_perf_comparison_to_be_performed(chart_data, baseline_name):
+        summary_header = "| Benchmark | " + " | ".join(chart_data.keys()) + \
+                        " | Change |\n"
+        final_num_columns += 1
+    else:
+        summary_header = "| Benchmark | " + " | ".join(chart_data.keys()) + \
+                        " |\n"
+
+    summary_header += "|---" * (len(chart_data) + final_num_columns) + "|\n"
+
+    return summary_header
+
+
+def get_improved_regressed_summary(is_improved: bool, rows_count: int):
+    title = "Improved"
+    if not is_improved:
+        title = "Regressed"
+
+    summary = (
+            "\n<details>\n"
+            "<summary>\n"        
+            f"{title} {rows_count} "
+            f"(threshold {options.epsilon*100:.2f}%)\n" 
+            "</summary>\n\n"
+            )
+
+    return summary
+
+
+def get_relative_perf_summary(group_size: int, group_name: str):
+    summary = (
+            "\n<details>\n"
+            f"<summary> Relative perf in group {group_name} " 
+            f"({group_size})\n"
+            "</summary>\n\n"
+            )
+
+    return summary
+
 
-    markdown_sections.append(f"""
-<details>
-<summary>Benchmark details - environment, command...</summary>
-""")
+def get_main_branch_run_name(chart_data: dict[str, list[Result]], 
+                             baseline_name: str):
+    for key in chart_data.keys():
+        if key != baseline_name:
+            return key
+        
+    return None
 
-    for res in results:
-        env_vars_str = '\n'.join(f"{key}={value}" for key, value in res.env.items())
-        markdown_sections.append(f"""
-<details>
-<summary>{res.label}</summary>
 
-#### Environment Variables:
-{env_vars_str}
+def get_available_markdown_size(current_markdown_size: int):
+    return max(0, max_markdown_size - current_markdown_size)
 
-#### Command:
-{' '.join(res.command)}
 
-</details>
-""")
-    markdown_sections.append(f"""
-</details>
-""")
-    return "\n".join(markdown_sections)
+def is_content_in_size_limit(content_size: int, current_markdown_size: int):
+    return content_size <= get_available_markdown_size(current_markdown_size)
 
-def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
-    summary_table = "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n"
-    summary_table += "|---" * (len(chart_data) + 4) + "|\n"
+
+def get_explicit_group_name(result: Result):
+    explicit_group_name = result.explicit_group
+
+    if explicit_group_name != "":
+        return explicit_group_name
+    else:
+        return "Other"
+    
+
+# Function to generate the markdown collapsible sections for each variant
+def generate_markdown_details(results: list[Result], 
+                              current_markdown_size: int, 
+                              markdown_size: MarkdownSize):
+    markdown_sections = []
+    markdown_start = ("\n<details>\n"
+                      "<summary>Benchmark details - environment, command..." 
+                      "</summary>\n")
+    markdown_sections.append(markdown_start)
+
+    for res in results:        
+        env_dict = res.env
+        command = res.command
+
+        # If data is collected from already saved results,
+        # the content is parsed as strings
+        if isinstance(res.env, str):
+            # Since the scripts would be used solely on data prepared
+            # by our scripts, this should be safe
+            # However, maybe needs an additional blessing
+            # https://docs.python.org/3/library/ast.html#ast.literal_eval
+            env_dict = ast.literal_eval(res.env)
+        if isinstance(res.command, str):
+            command = ast.literal_eval(res.command)
+
+        section = ("\n<details>\n"
+                    f"<summary>{res.label}</summary>\n\n"
+                    "#### Command:\n" 
+                    f"{' '.join(command)}\n\n")
+        
+        if env_dict:
+            env_vars_str = '\n'.join(f"{key}={value}" 
+                                 for key, value in env_dict.items())
+            section += (f"#### Environment Variables:\n {env_vars_str}\n")
+
+        section += "\n</details>\n" 
+            
+        markdown_sections.append(section)
+
+    markdown_sections.append("\n</details>\n")
+    
+    full_markdown = "\n".join(markdown_sections)
+
+    if markdown_size == MarkdownSize.FULL:
+        return full_markdown
+    else:
+        if is_content_in_size_limit(len(full_markdown), current_markdown_size):
+            return full_markdown
+        else:
+            return "\nBenchmark details contain too many chars to display\n"
+
+def generate_summary_table_and_chart(chart_data: dict[str, list[Result]], 
+                                     baseline_name: str, 
+                                     markdown_size: MarkdownSize):
+    summary_table = get_chart_markdown_header(chart_data=chart_data,
+                                              baseline_name=baseline_name)
 
     # Collect all benchmarks and their results
+    # key: benchmark name, 
+    # value: dict(run_name : single_result_in_the_given_run)
     benchmark_results = collections.defaultdict(dict)
+
+    # key: run name
+    # results: results from different benchmarks collected in the named run
     for key, results in chart_data.items():
         for res in results:
             benchmark_results[res.name][key] = res
@@ -62,159 +192,209 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]):
     # Generate the table rows
     output_detailed_list = []
 
-
-    global_product = 1
-    mean_cnt = 0
-    improved = 0
-    regressed = 0
-    no_change = 0
-
     for bname, results in benchmark_results.items():
         oln = OutputLine(bname)
         oln.row = f"| {bname} |"
         best_value = None
         best_key = None
 
-        # Determine the best value
+        are_suite_group_assigned = False
+
+        # Determine the best value for the given benchmark, among the results
+        # from all saved runs specified by --compare
+        # key: run name,
+        # res: single result collected in the given run
         for key, res in results.items():
-            if best_value is None or (res.lower_is_better and res.value < best_value) or (not res.lower_is_better and res.value > best_value):
+            if not are_suite_group_assigned:
+                oln.suite = res.suite
+                oln.explicit_group = get_explicit_group_name(res)
+
+                are_suite_group_assigned = True
+
+            if best_value is None or \
+            (res.lower_is_better and res.value < best_value) or \
+            (not res.lower_is_better and res.value > best_value):
                 best_value = res.value
                 best_key = key
 
-        # Generate the row with the best value highlighted
+        # Generate the row with all the results from saved runs specified by
+        # --compare,
+        # Highlight the best value in the row with data
         if options.verbose: print(f"Results: {results}")
         for key in chart_data.keys():
             if key in results:
                 intv = results[key].value
                 if key == best_key:
-                    oln.row += f" <ins>{intv:3f}</ins> {results[key].unit} |"  # Highlight the best value
+                    # Highlight the best value
+                    oln.row += f" <ins>{intv:3f}</ins> {results[key].unit} |"  
                 else:
                     oln.row += f" {intv:.3f} {results[key].unit} |"
             else:
                 oln.row += " - |"
 
-        if len(chart_data.keys()) == 2:
-            key0 = list(chart_data.keys())[0]
-            key1 = list(chart_data.keys())[1]
-            if (key0 in results) and (key1 in results):
-                v0 = results[key0].value
-                v1 = results[key1].value
+        if is_relative_perf_comparison_to_be_performed(chart_data, 
+                                                       baseline_name):
+            pr_key = baseline_name
+            main_key = get_main_branch_run_name(chart_data, baseline_name) 
+
+            if (pr_key in results) and (main_key in results):
+                pr_val = results[pr_key].value
+                main_val = results[main_key].value
                 diff = None
-                if v0 != 0 and results[key0].lower_is_better:
-                    diff = v1/v0
-                elif v1 != 0 and not results[key0].lower_is_better:
-                    diff = v0/v1
+                if pr_val != 0 and results[pr_key].lower_is_better:
+                    diff = main_val / pr_val
+                elif main_val != 0 and not results[pr_key].lower_is_better:
+                    diff = pr_val / main_val
 
                 if diff != None:
-                    oln.row += f"{(diff * 100):.2f}%"
                     oln.diff = diff
 
         output_detailed_list.append(oln)
 
 
-    sorted_detailed_list = sorted(output_detailed_list, key=lambda x: (x.diff is not None, x.diff), reverse=True)
+    sorted_detailed_list = sorted(output_detailed_list, key=lambda x:
+                                  (x.diff is not None, x.diff), reverse=True)
 
-    diff_values = [oln.diff for oln in sorted_detailed_list if oln.diff is not None]
+    diff_values = [oln.diff for oln in sorted_detailed_list 
+                   if oln.diff is not None]
 
-    if len(diff_values) > 0:
-        max_diff = max(max(diff_values) - 1, 1 - min(diff_values))
+    improved_rows = []
+    regressed_rows = []
 
+    if len(diff_values) > 0:
         for oln in sorted_detailed_list:
             if oln.diff != None:
-                oln.row += f" | {(oln.diff - 1)*100:.2f}%"
                 delta = oln.diff - 1
-                oln.bars = round(10*(oln.diff - 1)/max_diff) if max_diff != 0.0 else 0
-                if oln.bars == 0 or abs(delta) < options.epsilon:
-                    oln.row += " | . |"
-                elif oln.bars > 0:
-                    oln.row += f" | {'+' * oln.bars} |"
-                else:
-                    oln.row += f" | {'-' * (-oln.bars)} |"
+                oln.row += f" {delta*100:.2f}%"
 
-                mean_cnt += 1
                 if abs(delta) > options.epsilon:
                     if delta > 0:
-                        improved+=1
+                        improved_rows.append(oln.row + " | \n")
                     else:
-                        regressed+=1
-                else:
-                    no_change+=1
-
-                global_product *= oln.diff
-            else:
-                oln.row += " |   |"
+                        regressed_rows.append(oln.row + " | \n")
 
             if options.verbose: print(oln.row)
+
             summary_table += oln.row + "\n"
     else:
         for oln in sorted_detailed_list:
-            oln.row += " |   |"
-            if options.verbose: print(oln.row)
             summary_table += oln.row + "\n"
 
-    grouped_objects = collections.defaultdict(list)
-
-    for oln in output_detailed_list:
-        s = oln.label
-        prefix = re.match(r'^[^_\s]+', s)[0]
-        grouped_objects[prefix].append(oln)
-
-    grouped_objects = dict(grouped_objects)
-
-    if mean_cnt > 0:
-        global_mean = global_product ** (1/mean_cnt)
-        summary_line = f"Total {mean_cnt} benchmarks in mean. "
-        summary_line += "\n" + f"Geomean {global_mean*100:.3f}%. \nImproved {improved} Regressed {regressed} (threshold {options.epsilon*100:.2f}%)"
-    else:
+    regressed_rows.reverse()
+
+    is_at_least_one_diff = False
+    summary_line = ''
+    
+    if len(improved_rows) > 0:
+        is_at_least_one_diff = True
+        summary_line += get_improved_regressed_summary(
+            is_improved=True, 
+            rows_count=len(improved_rows)
+            )        
+        summary_line += get_chart_markdown_header(
+            chart_data=chart_data,
+            baseline_name=baseline_name
+            ) 
+
+        for row in improved_rows:
+            summary_line += row 
+
+        summary_line += "\n</details>"
+    
+    if len(regressed_rows) > 0:
+        is_at_least_one_diff = True
+        summary_line += get_improved_regressed_summary(
+            is_improved=False, 
+            rows_count=len(regressed_rows)
+            )
+            
+        summary_line += get_chart_markdown_header(
+            chart_data=chart_data,
+            baseline_name=baseline_name
+            ) 
+
+        for row in regressed_rows:
+            summary_line += row 
+        
+        summary_line += "\n</details>"
+
+    if not is_at_least_one_diff:
         summary_line = f"No diffs to calculate performance change"
 
     if options.verbose: print(summary_line)
 
-
     summary_table = "\n## Performance change in benchmark groups\n"
 
-    for name, outgroup in grouped_objects.items():
-        outgroup_s = sorted(outgroup, key=lambda x: (x.diff is not None, x.diff), reverse=True)
-        product = 1.0
-        n = len(outgroup_s)
-        r = 0
-        for oln in outgroup_s:
-            if oln.diff != None:
-                product *= oln.diff
-                r += 1
-        if r > 0:
-            summary_table += f"""
-<details>
-<summary> Relative perf in group {name} ({n}): {math.pow(product, 1/r)*100:.3f}% </summary>
-
-"""
-        else:
-            summary_table += f"""
-<details>
-<summary> Relative perf in group {name} ({n}): cannot calculate </summary>
+    grouped_in_suites = collections.defaultdict(lambda: 
+                                                collections.defaultdict(list))
+    for oln in output_detailed_list:
+        grouped_in_suites[oln.suite][oln.explicit_group].append(oln)
+    
+    for suite_name, suite_groups in grouped_in_suites.items():
+        summary_table += f"<details><summary>{suite_name}</summary>\n\n"
 
-"""
-        summary_table += "| Benchmark | " + " | ".join(chart_data.keys()) + " | Relative perf | Change | - |\n"
-        summary_table += "|---" * (len(chart_data) + 4) + "|\n"
+        for name, outgroup in suite_groups.items():
+            outgroup_s = sorted(outgroup, key=lambda x: 
+                                (x.diff is not None, x.diff), reverse=True)
 
-        for oln in outgroup_s:
-            summary_table += f"{oln.row}\n"
+            summary_table += get_relative_perf_summary(
+                                                    group_size=len(outgroup_s), 
+                                                    group_name=name
+                                                    )
+            summary_table += get_chart_markdown_header(chart_data, 
+                                                       baseline_name) 
 
-        summary_table += f"""
-</details>
+            for oln in outgroup_s:
+                summary_table += f"{oln.row}\n"
 
-"""
+            summary_table += "\n</details>\n\n"
 
-    return summary_line, summary_table
+        summary_table += "</details>"
 
-def generate_markdown(name: str, chart_data: dict[str, list[Result]]):
-    (summary_line, summary_table) = generate_summary_table_and_chart(chart_data)
+    if markdown_size == MarkdownSize.FULL:
+        return summary_line, summary_table
+    else:
+        full_content_size = len(summary_table) + len(summary_line)
 
-    return f"""
-# Summary
-{summary_line}\n
-(<ins>result</ins> is better)\n
-{summary_table}
-# Details
-{generate_markdown_details(chart_data[name])}
-"""
+        if is_content_in_size_limit(content_size=full_content_size,
+                                     current_markdown_size=0):
+            return summary_line, summary_table
+        else:
+            if is_content_in_size_limit(content_size=len(summary_line), 
+                                        current_markdown_size=0):
+                return summary_line, ''
+            else:
+                return (
+                    "\n# Summary\n"
+                    "Benchmark output is too large to display\n\n"
+                    )
+
+
+def generate_markdown(name: str, 
+                      chart_data: dict[str, list[Result]],
+                      markdown_size: MarkdownSize):
+    (summary_line, summary_table) = generate_summary_table_and_chart(
+        chart_data, 
+        name, 
+        markdown_size
+        )
+
+    current_markdown_size = len(summary_line) + len(summary_table)
+
+    generated_markdown = (
+        "\n# Summary\n"
+        "(<ins>Emphasized values</ins> are the best results)\n"
+        f"{summary_line}\n"
+        f"{summary_table}\n\n"
+    )
+
+    if name in chart_data.keys():
+        markdown_details = generate_markdown_details(chart_data[name], 
+                                                     current_markdown_size,
+                                                     markdown_size)
+        generated_markdown += (
+            "\n# Details\n"
+            f"{markdown_details}\n"
+        )
+
+    return generated_markdown