Update with local elements

JuliaPerf · Sep 1, 2024 · 8bde5c1 · 8bde5c1
1 parent 357bf27
commit 8bde5c1
Show file tree

Hide file tree

Showing 17 changed files with 422 additions and 164 deletions.
diff --git a/Project.toml b/Project.toml
@@ -4,7 +4,6 @@ authors = ["Dvegrod <[email protected]>, Samuel Omlin <[email protected]>, a
 version = "0.1.0"
 
 [deps]
-BandwidthBenchmark = "68eb07c1-04fd-4e62-9736-d6127c4c03c6"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"

diff --git a/src/PerfTest.jl b/src/PerfTest.jl
@@ -9,18 +9,23 @@ export @perftest, @on_perftest_exec, @on_perftest_ignore, @perftest_config,
 using MacroTools
 include("structs.jl")
 include("auxiliar.jl")
-include("prints.jl")
 include("macros.jl")
 
 include("config.jl")
 
 include("perftest/structs.jl")
 include("perftest/data_handling.jl")
 
+include("benchmarking.jl")
+
+include("prints.jl")
+
 include("metrics.jl")
 
 include("methodologies/regression.jl")
 include("methodologies/effective_memory_throughput.jl")
+include("methodologies/roofline.jl")
+#include("methodologies/fullroofline.jl")
 
 include("prefix.jl")
 include("suffix.jl")

diff --git a/src/auxiliar.jl b/src/auxiliar.jl
@@ -1,7 +1,4 @@
-include("structs.jl")
 using MacroTools: ismatch
-include("perftest/structs.jl")
-include("config.jl")
 
 # Function that generates a test name if needed
 function genTestName!(state::Context)

diff --git a/src/benchmarking.jl b/src/benchmarking.jl
@@ -1,14 +1,11 @@
 
-include("structs.jl")
-include("config.jl")
-
-using MPI
+#using MPI
 using LinearAlgebra
 # Memory and CPU benchmarks used by different methodologies
 
 
 function setupMemoryBandwidthBenchmark()::Expr
-    # MPI
+    # TODO MPI extra behaviour
     #println("="^26 * "Maximum memory throughput calculation" * "="^26)
     if mpi_enabled
         ret = quote
@@ -42,7 +39,7 @@ function setupMemoryBandwidthBenchmark()::Expr
     else
         ret = quote
             # Begin probing the maximum memory throughput
-            global bench_data = STREAMBenchmark.benchmark(N = 1024 * 256 * 140)
+            global bench_data = STREAMBenchmark.benchmark(N = 1024 * 256 * 540)
             peakbandwidth = bench_data.multi.maximum / 1e3
         end
     end

diff --git a/src/config.jl b/src/config.jl
@@ -1,6 +1,5 @@
 
 using MacroTools
-include("structs.jl")
 
 # CONFIG STRUCTURE DEFINITION
 # FOR DEFAULTS SEE BELOW COMMENT "DEFAULTCONFIG":
@@ -33,7 +32,7 @@ end
 
     plotting::Bool
 
-    tolerance_around_memcpu_intersection::Struct_Tolerance
+    tolerance::Struct_Tolerance
 end
 
 @kwdef mutable struct Struct_Metric_Config
@@ -109,7 +108,7 @@ roofline = Struct_Roofline_Config(
 
     autoflops = false,
 
-    tolerance_around_memcpu_intersection = Struct_Tolerance(
+    tolerance = Struct_Tolerance(
         max_percentage = 2.0,
         min_percentage = 0.7
     )
@@ -147,14 +146,6 @@ metrics = Struct_Metrics(
     )
 )
 
-# MACROS
-# Perftest_config macro, used to set customised configuration
-macro perftest_config(expr)
-    # It deletes the contents and does nothing since this macro wont
-    # be evaluated during performance testing but during functional testing
-    # The contents are used by parsing them during the test translation
-    return nothing
-end
 
 # AST MODIFIERS
 # Perftest_config AST Manipulation

diff --git a/src/macros.jl b/src/macros.jl
@@ -3,10 +3,12 @@ macro perftest(anything)
     return esc(anything)
 end
 
-macro perftest_config(anything)
-    return :(
-        begin end
-    )
+# Perftest_config macro, used to set customised configuration
+macro perftest_config(expr)
+    # It deletes the contents and does nothing since this macro wont
+    # be evaluated during performance testing but during functional testing
+    # The contents are used by parsing them during the test translation
+    return begin end
 end
 
 macro on_perftest_exec(anything)

diff --git a/src/methodologies/effective_memory_throughput.jl b/src/methodologies/effective_memory_throughput.jl
@@ -1,12 +1,4 @@
 
-using Suppressor
-using STREAMBenchmark
-
-include("../structs.jl")
-include("../config.jl")
-include("../perftest/structs.jl")
-include("../perftest/data_handling.jl")
-include("../metrics.jl")
 
 
 # THIS FILE SAVES THE MAIN COMPONENTS OF THE EMPYRICAL EFFECTIVE MEM. THROUGHPUT
@@ -35,7 +27,7 @@ function effMemThroughputEvaluation(context :: Context)::Expr
                     min = $(effective_memory_throughput.tolerance.min_percentage)
                     max = $(effective_memory_throughput.tolerance.max_percentage)
 
-                    result = PerfTests.Metric_Result(
+                    result = PerfTest.Metric_Result(
                         name=$(cmetric.name),
                         units=$(cmetric.units),
                         value=$(customMetricExpressionParser(cmetric.formula))
@@ -47,7 +39,7 @@ function effMemThroughputEvaluation(context :: Context)::Expr
                     _test = min < ratio < max
 
 
-                    constraint = PerfTests.Metric_Constraint(
+                    constraint = PerfTest.Metric_Constraint(
                         reference=peakbandwidth,
                         threshold_min=min * peakbandwidth,
                         threshold_min_percent=min,
@@ -61,14 +53,14 @@ function effMemThroughputEvaluation(context :: Context)::Expr
 
 
                     # Setup result collecting struct
-                    methodology_result = PerfTests.Methodology_Result(
+                    methodology_result = PerfTest.Methodology_Result(
                         name="EFFECTIVE MEMORY THROUGHPUT RATIO",
-                        metrics=Pair{PerfTests.Metric_Result,PerfTests.Metric_Constraint}[]
+                        metrics=Pair{PerfTest.Metric_Result,PerfTest.Metric_Constraint}[]
                     )
 
                     push!(methodology_result.metrics, (result => constraint))
 
-                    PerfTests.printMethodology(methodology_result, $(length(context.depth)))
+                    PerfTest.printMethodology(methodology_result, $(length(context.depth)))
                     # Register metric results TODO
 
                     @test _test

diff --git a/src/methodologies/fullroofline.jl b/src/methodologies/fullroofline.jl
@@ -0,0 +1,179 @@
+
+# THIS FILE SAVES THE MAIN COMPONENTS OF THE ROOFLINE
+# METHODOLOGY BEHAVIOUR
+
+# GENERATED SPACE IMPORTANT SYMBOLS:
+#
+# roofline_opint
+# local_peakflops
+# local_peakbandwidth
+
+
+using UnicodePlots
+
+rooflineFunc = (maxflops, maxbandwidth) -> (opint -> min(maxflops, maxbandwidth * opint))
+
+
+
+"""
+  Parses roofline user request and sets up data for
+  roofline computation.
+"""
+function fullRooflineMacroParse(x::Expr, ctx::Context)::Expr
+    if roofline.enabled
+        peakflops = nothing
+        peakbandwidth = nothing
+        actual_flops_expr = nothing
+
+
+        # Enables local roofline model construction
+        ctx.env_flags.roofline_prefix = true
+
+        # Capture needed args
+        for arg in x.args
+            # Is macro kwarg?
+            if arg isa Expr && arg.head == Symbol("=")
+                if arg.args[1] == Symbol("cpu_peak")
+                    peakflops = eval(arg.args[2])
+                elseif arg.args[1] == Symbol("target_opint")
+                    #TODO
+                elseif arg.args[1] == Symbol("membw_peak")
+                    peakbandwidth = eval(arg.args[2])
+                elseif arg.args[1] == Symbol("actual_flops")
+                    if arg.args[2] isa Expr && arg.args[2].head == :block
+                        @warn "OU YEAH"
+                        actual_flops_expr = fullParsingSuite(arg.args[2])
+                    end
+                end
+            end
+        end
+        # Last is the operational intensity BLOCK
+        if x.args[end].head == :block
+            t = fullParsingSuite(x.args[end])
+        else
+            error("Malformed @roofline, opint must be in block format")
+        end
+
+        ctx.local_injection = quote
+            $(ctx.local_injection)
+            roofline_opint = $t
+            $(if actual_flops_expr != nothing
+                  ctx.env_flags.roofline_full = true
+                  quote
+                      local_actual_flops_expr = $actual_flops_expr
+                  end
+              else
+                  quote end
+              end)
+            local_peakflops = $(peakflops == nothing ? :(peakflops) : peakflops)
+            local_peakbandwidth = $(peakbandwidth == nothing ? :(peakbandwidth) : peakbandwidth)
+        end
+    end
+    return quote
+        begin end
+    end
+end
+
+function rooflineEvaluation(context::Context)::Expr
+    return roofline.enabled && context.env_flags.roofline_prefix ? (
+        context.env_flags.roofline_prefix = false; quote
+        # Threshold
+        min = $(roofline.tolerance_around_memcpu_intersection.min_percentage)
+        max = $(roofline.tolerance_around_memcpu_intersection.max_percentage)
+
+            result = PerfTest.Metric_Result(
+                name="OPERATIONAL INTENSITY",
+                units="FLOP/Byte",
+                value=roofline_opint
+            )
+
+
+            $(
+                if context.env_flags.roofline_full
+                    quote
+                        operational_intensity_result = result
+                        result = PerfTest.Metric_Result(
+                            name="Real Performance vs Model performance",
+                            units="[%(real/model)]",
+                            value=local_actual_flops_expr / minimum(roofline_opint * mem_peak, cpu_peak)
+                        )
+                        methodology_result.custom_elements[:opint] = operational_intensity_result
+                        methodology_result.custom_elements[:realf] = PerfTest.Metric_Result(
+                        name= "Real Performance",
+                        units="GFlops",
+                        value=local_real_performance,)
+                    end
+                    context.env_flags.roofline_full = false
+                end
+            )
+
+
+        # MEM LIMIT TO CPU LIMIT THRESHOLD (used as the reference for bounded tests)
+        ref = local_peakflops / local_peakbandwidth
+
+        # Ratio
+        ratio = result.value / ref
+        # test
+        _test = min < ratio < max
+
+
+        constraint = PerfTest.Metric_Constraint(
+            reference=ref,
+            threshold_min=min * ref,
+            threshold_min_percent=min,
+            threshold_max=max * ref,
+            threshold_max_percent=max,
+            low_is_bad=true,
+            succeeded=_test, custom_plotting=Symbol[],
+            full_print=$(verbose ? :(true) : :(!_test))
+        )
+
+
+
+        # Setup result collecting struct
+        methodology_result = PerfTest.Methodology_Result(
+            name="ROOFLINE",
+            metrics=Pair{PerfTest.Metric_Result,PerfTest.Metric_Constraint}[],
+        )
+
+        cpu_peak = PerfTest.Metric_Result(
+            name="Peak CPU flops",
+            units="GFlops/s",
+            value=local_peakflops
+        )
+        methodology_result.custom_elements[:cpu_peak] = cpu_peak
+        mem_peak = PerfTest.Metric_Result(
+            name="Peak memory bandwidth",
+            units="GB/s",
+            value=local_peakbandwidth
+        )
+        methodology_result.custom_elements[:mem_peak] = mem_peak
+            #push!(methodology_result.custom_elements, (:plot => plotexpr))
+        limit_mem_b = PerfTest.Metric_Result(
+            name="Is memory limited?",
+            units="Yes/No",
+            value= ratio > (local_peakflops / local_peakbandwidth) ? "NO" : "YES"
+        )
+        methodology_result.custom_elements[:mem_lim] = limit_mem_b
+        roof_corner = PerfTest.Metric_Result(
+            name="Roofline corner",
+            units="FLOP/Byte",
+            value= local_peakflops / local_peakbandwidth
+        )
+        methodology_result.custom_elements[:roof_corner] = roof_corner
+
+        methodology_result.custom_elements[:plot] = PerfTest.printRoofline
+
+        $(checkAuxiliaryCustomMetrics(context))
+
+        push!(methodology_result.metrics, (result => constraint))
+
+        PerfTest.printMethodology(methodology_result, $(length(context.depth)))
+        # Register metric results TODO
+        current_test_results[:roofline] = methodology_result
+
+        @test _test
+    end) : quote
+        begin end
+    end
+end