diff --git a/.github/workflows/ci-experiments.yml b/.github/workflows/ci-experiments.yml new file mode 100644 index 00000000..d73ba6ea --- /dev/null +++ b/.github/workflows/ci-experiments.yml @@ -0,0 +1,35 @@ +name: Run experiments + +on: + push: + branches: + - main + pull_request: + +jobs: + run-experiments: + runs-on: ubuntu-latest + container: ghcr.io/nazavode/snitch-toolchain:2.3 + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Run experiments + run: scripts/run.sh --abort-on-error --tag cirun + - name: Read CSV + id: csv + uses: juliangruber/read-file-action@v1 + with: + path: results/cycles_cirun.csv + - name: Create MD + uses: petems/csv-to-md-table-action@master + id: csv-table-output + with: + csvinput: ${{ steps.csv.outputs.content }} + - uses: mshick/add-pr-comment@v1 + with: + message: | + ${{steps.csv-table-output.outputs.markdown-table}} + repo-token: ${{ secrets.GITHUB_TOKEN }} + repo-token-user-login: 'github-actions[bot]' # The user.login for temporary GitHub tokens + allow-repeats: true diff --git a/kernels/dsum/8x16xf32/Makefile b/kernels/dsum/8x16xf32/Makefile index ce22e9ef..3cd46fea 100644 --- a/kernels/dsum/8x16xf32/Makefile +++ b/kernels/dsum/8x16xf32/Makefile @@ -2,28 +2,14 @@ include ../../../snitch/Makefile.rules -PRES = -PRES += pres_0_llvm.x -PRES += pres_1_llvm_clean.x -PRES += pres_2_ssr_loop.x -PRES += pres_3_ssr_frep.x - -TESTS = $(PRES) +TESTS = TESTS += baseline.x TESTS += noalias.x TESTS += ssr1d.x TESTS += ssr1d_frep1d.x TESTS += ssr2d.x TESTS += linalg.x -TESTS += vector.x TESTS += scf.x TESTS += linalg_2.x include ../../Makefile.kernels - -# Presentation specific - -all-pres: $(PRES) - -allrun-pres: TESTS=$(PRES) -allrun-pres: $(RUN) diff --git a/kernels/dsum/8x16xf32/pres_0_llvm.S.txt b/kernels/dsum/8x16xf32/pres_0_llvm.S.txt deleted file mode 100644 index 87c1fea0..00000000 --- a/kernels/dsum/8x16xf32/pres_0_llvm.S.txt +++ /dev/null @@ -1,31 +0,0 @@ -# LLVM output (1189 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "baseline.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: # @ssum -# %bb.0: - mv a3, zero - addi a4, zero, 1024 -.LBB0_1: # =>This Inner Loop Header: Depth=1 - add a5, a0, a3 - fld ft0, 0(a5) - add a5, a1, a3 - fld ft1, 0(a5) - fadd.d ft0, ft1, ft0 - add a5, a2, a3 - addi a3, a3, 8 - fsd ft0, 0(a5) - bne a3, a4, .LBB0_1 -# %bb.2: - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/dsum/8x16xf32/pres_1_llvm_clean.S.txt b/kernels/dsum/8x16xf32/pres_1_llvm_clean.S.txt deleted file mode 100644 index 0869aaa4..00000000 --- a/kernels/dsum/8x16xf32/pres_1_llvm_clean.S.txt +++ /dev/null @@ -1,30 +0,0 @@ -# LLVM output cleaned (1189 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "baseline.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: - mv a3, zero # a3 <- 0 - addi a4, zero, 1024 # a4 <- 1024 -.loop_body: - add a5, a0, a3 # a5 <- a0 + a3 - fld ft0, 0(a5) # ft0 <- *a5 - add a5, a1, a3 # a5 <- a1 + a3 - fld ft1, 0(a5) # ft1 <- *a5 - fadd.d ft0, ft1, ft0 # ft0 <- ft1 + ft0 - add a5, a2, a3 # a5 <- a2 + a3 - addi a3, a3, 8 # a3 <- a3 + 4 - fsd ft0, 0(a5) # *a5 <- ft0 - bne a3, a4, .loop_body # if a3 < a4 repeat -# end_loop: - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/dsum/8x16xf32/pres_2_ssr_loop.S.txt b/kernels/dsum/8x16xf32/pres_2_ssr_loop.S.txt deleted file mode 100644 index 8f208ddb..00000000 --- a/kernels/dsum/8x16xf32/pres_2_ssr_loop.S.txt +++ /dev/null @@ -1,34 +0,0 @@ -# SSR Loop (248 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "ssr1d.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: # @ssum -# Setup Snitch - addi a3, zero, 127 - scfgwi a3, 95 - addi a3, zero, 8 - scfgwi a3, 223 - scfgwi a0, 768 - scfgwi a1, 769 - scfgwi a2, 898 - csrsi 1984, 1 -# Loop - addi a0, zero, 128 # a0 <- 128 -.loop_body: - fadd.d ft2, ft0, ft1 # stream sum - addi a0, a0, -1 # a0 <- a0 - 1 - bnez a0, .loop_body # if a0 > 0 repeat -# end_loop: - csrci 1984, 1 - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/dsum/8x16xf32/pres_3_ssr_frep.S.txt b/kernels/dsum/8x16xf32/pres_3_ssr_frep.S.txt deleted file mode 100644 index 556e0db9..00000000 --- a/kernels/dsum/8x16xf32/pres_3_ssr_frep.S.txt +++ /dev/null @@ -1,31 +0,0 @@ -# SSR + FREP (118 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "ssr1d.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: # @ssum -# Setup Snitch - addi a3, zero, 127 - scfgwi a3, 95 - addi a3, zero, 8 - scfgwi a3, 223 - scfgwi a0, 768 - scfgwi a1, 769 - scfgwi a2, 898 - csrsi 1984, 1 -# Loop - addi a1, zero, 127 # a1 <- 127 - frep.o a1, 1, 0, 0 # repeat next instruction 127 + 1 times - fadd.d ft2, ft0, ft1 # stream sum - csrci 1984, 1 - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/matmul/16x16xf64/Makefile b/kernels/matmul/16x16xf64/Makefile new file mode 100644 index 00000000..26ef0923 --- /dev/null +++ b/kernels/matmul/16x16xf64/Makefile @@ -0,0 +1,9 @@ +.DEFAULT_GOAL := all + +include ../../../snitch/Makefile.rules + +TESTS = +TESTS += baseline.x +TESTS += linalg.x + +include ../../Makefile.kernels diff --git a/kernels/matmul/16x16xf64/baseline.c b/kernels/matmul/16x16xf64/baseline.c new file mode 100644 index 00000000..8b245d78 --- /dev/null +++ b/kernels/matmul/16x16xf64/baseline.c @@ -0,0 +1,16 @@ +#include "data.h" + +#include + +#include + +void matmul(const double* x, const double* y, double* g) { + for (uint32_t i = 0; i < M; ++i) { + for (uint32_t j = 0; j < N; ++j) { + for (uint32_t k = 0; k < K; ++k) { + // row-major accesses + g[i * N + j] += x[i * K + k] * y[k * N + j]; + } + } + } +} diff --git a/kernels/matmul/16x16xf64/data.c b/kernels/matmul/16x16xf64/data.c new file mode 100644 index 00000000..95603507 --- /dev/null +++ b/kernels/matmul/16x16xf64/data.c @@ -0,0 +1,783 @@ +#define M 16 +#define K 16 +#define N 16 + +const double X[M * K] = { +97.62700785, + 430.37873274, + 205.52675214, + 89.76636599, +-152.69040132, + 291.78822613, +-124.82557747, + 783.54600156, + 927.325521 , +-233.11696235, + 583.45007617, + 57.78983951, + 136.08912219, + 851.19327659, +-857.9278836 , + -825.7414006 , +-959.56320512, + 665.2396911 , + 556.3135019 , + 740.02429649, + 957.23668447, + 598.31712843, + -77.04127549, + 561.05835257, +-763.45114826, + 279.84204266, +-713.29342518, + 889.3378341 , + 43.6966435 , +-170.67612002, + -470.88877579, + 548.46737887, + -87.69933557, + 136.86789774, +-962.42039913, + 235.27099415, + 224.19144544, + 233.86799375, + 887.49615703, + 363.64059821, + -280.98419885, +-125.9360924 , + 395.26239185, +-879.54905674, + 333.53343089, + 341.27573924, +-579.23487785, +-742.14740469, +-369.14329815, +-272.57845811, + 140.39354084, +-122.79697308, + 976.74767612, +-795.9103785 , +-582.24648781, + -677.38096423, + 306.21665093, +-493.41679492, + -67.37845429, +-511.148816 , + -682.06083271, +-779.24971767, + 312.65917893, +-723.6340973 , +-606.83527664, + -262.54965868, + 641.9864597 , +-805.79744841, + 675.889815 , +-807.80318421, + 952.91893003, + -62.6975967 , + 953.52217638, + 209.69103949, + 478.5271588 , + -921.62441549, +-434.38607485, +-759.60687757, +-407.71960496, +-762.54456209, + -364.03364121, +-171.47401097, +-871.7050073 , + 384.94423874, + 133.20290841, + -469.22101812, + 46.49610693, +-812.11897848, + 151.89299111, + 858.59239515, + -362.8620951 , + 334.82075993, +-736.40427519, + 432.65440824, +-421.18781411, + -633.61727599, + 173.02586962, +-959.78490763, + 657.88005843, +-990.60904761, + 355.63307359, +-459.98405362, + 470.38804425, + 924.37709023, +-502.49371296, + 152.31466884, + 184.08386254, + 144.50381158, +-553.83673472, + 905.49802303, + -105.74924276, + 692.81734494, + 398.95855064, +-405.12609829, + 627.5956394 , + -206.98851831, + 762.20639422, + 162.54574527, + 763.47072371, + 385.06318016, + 450.50855964, + 2.64876385, + 912.16726945, + 287.98039846, +-152.28990288, + 212.78642826, +-961.61360338, +-396.85036665, + 320.34707499, +-419.84478558, + 236.030858 , +-142.46259811, +-729.05187156, +-403.43534809, + 139.9298214 , + 181.7455225 , + 148.6504977 , + 306.40163971, + 304.20654 , +-137.16312913, + 793.0931917 , +-264.8762599 , +-128.27014947, + 783.84671003, + 612.38797809, + 407.77716708, +-799.54622538, + 838.96522749, + 428.4825991 , + 997.69401314, + -701.10339068, + 736.25211474, +-675.01413065, + 231.11912857, +-752.3600343 , + 696.01645864, + 614.63791745, + 138.20147723, +-185.63340555, +-861.66600909, + 394.85754629, + -92.91463464, + 444.11119894, + 732.76465186, + 951.04301001, + 711.60668479, +-976.57183163, +-280.04387104, + 459.98112485, +-656.74064548, + 42.07321241, +-891.32402332, +-600.00695021, +-962.95641108, + 587.39540671, + -552.15062388, +-309.29663861, + 856.16258693, + 408.82880385, +-936.32214094, + -670.611687 , + 242.956803 , + 154.45717721, +-524.21435725, + 868.42799585, + 227.93191193, + 71.26560605, + 179.81995271, + 460.24405903, +-376.11000904, + -203.55787557, +-580.31250205, +-627.61398824, + 888.74477997, + 479.1015901 , + -19.08238276, +-545.17074405, +-491.28703646, +-883.94167935, +-131.16674888, + -376.40823601, + 392.68697763, +-244.49632142, +-640.79264488, +-950.64254322, + -865.50073707, + 358.785547 , + -92.60631089, + 73.15842222, + 793.34258608, + 980.67789479, +-566.2060312 , + 326.1564062 , +-473.35524653, +-958.69800107, + 516.75730767, +-359.96569836, +-233.07221166, + 176.63422711, + 662.09691047, + 257.96368718, + 745.30131089, +-452.91593037, + 596.09366783, +-628.72811139, + 905.58331394, + 374.97655278, +-568.98464577, + 894.74118098, + 461.71161354, + -492.11671481, +-573.37604527, + 36.40142786, +-948.67456389, +-585.05984912, + -150.6290625 , +-251.66003933, + -72.84915127, +-444.74258741, + 173.56869292, + 727.71121185, +-764.93628808, + 34.75821431, +-735.86378731, + 433.71936239, + -207.88059439, + 130.84262372, +-633.44032757, +-710.30448131, + -23.8874387 , + -288.7745243 , + 880.86389051, + 530.65050761, + 497.3272397 , + 807.43947949, + -833.15512912 +}; + + +const double Y[K * N] = { +104.38493984, + 168.95213791, + 923.87275709, +-415.70494641, +-518.34244017, + -799.41211547, +-967.14074082, + 859.05863358, + 339.83309318, + 570.30582405, + -436.53978849, + 172.82033237, +-872.08946776, + -28.74480813, + 954.99027949, + 753.01049063, +-323.68209633, + 923.14030908, +-536.59674706, + 898.63764483, + 882.75540941, + 598.4051747 , + 260.89587373, + 748.57593325, +-413.95943098, + 697.88711063, + 235.75338384, +-973.52628448, +-305.53296414, +-703.7182781 , + 963.65877964, + -43.25938592, + -5.217269 , + 278.9450328 , +-262.83078774, + -726.19945663, + 644.23546639, +-620.30417619, + 22.63796509, +-551.36594205, + -804.31103101, + 724.38303484, + 945.83897805, + 921.66931613, + 813.11099844, + 548.0946654 , +-333.70969594, +-837.79722002, +-185.51765717, +-535.53171566, + -735.02473048, +-893.14563643, + 451.18872842, +-977.14508275, + 541.16149701, + -706.1067092 , +-840.95583483, +-820.79393152, + 344.09561471, +-509.26558029, + -158.92106664, + 114.73758265, + 721.10234766, + 454.08852542, +-459.34418952, + -737.03440142, +-889.25135916, +-396.80273104, +-475.76370152, + -87.7188664 , + 366.5626711 , + 391.25089128, +-432.96230684, +-240.1460882 , +-637.69807653, + 577.09102461, +-886.30384713, + 393.99448345, + 557.39079188, + 554.8151237 , + -481.15487131, +-252.37372413, + 175.19927039, +-454.35619515, +-258.29440156, + -605.89143963, + -80.28823249, +-910.77539749, + 599.59176914, +-846.08710603, + 37.67029766, +-386.37980091, + 155.08589766, + 918.86668167, + 291.14048891, + -929.27512849, +-139.19512098, + 20.03370464, + 72.35498941, + 362.78502121, + -444.80780454, +-742.27886907, +-214.64864691, + 912.81144559, +-625.7382165 , + 807.96790986, + 87.61190015, + -86.17715671, + 764.08282046, + -82.79207646, + 448.33527322, +-201.94935659, + 808.0887858 , + 380.05004038, + 399.2441085 , + -344.55919689, + 513.55728547, + 272.12211089, +-519.95945324, +-678.92235503, + 592.78294903, + 918.33320607, + -83.72234548, + 181.96833065, + 715.44528839, + -85.55309329, + 903.74895367, + 151.50232409, + 641.5342414 , + 817.68743683, + 631.04763754, +-681.1710731 , + 257.79687812, +-203.13148276, +-874.57409595, + -151.93549622, +-482.63186622, + 698.07661686, +-933.39074691, + 917.96544373, + -289.26230306, +-286.58621919, +-967.34299463, +-629.53534953, +-197.48099839, + 858.58283461, +-800.77013956, + 890.60306696, + 738.97706109, + -91.67520618, + -346.59823646, +-534.51174144, + 228.92941295, +-933.85081705, +-968.78787111, + -142.408555 , +-863.85185205, +-496.11802351, +-557.67816931, +-493.61761255, + -737.88953757, +-975.9275542 , +-769.03140572, + 236.96051903, + 948.51242564, + 980.69000312, +-181.89180925, +-674.09114791, + 277.52351473, + -19.3893069 , + 978.81955457, +-869.3915857 , + 566.46887663, +-423.20300534, +-517.16275985, + 325.00914307, +-507.87363002, + 331.71823512, + 34.6170344 , +-151.82202313, + 109.37561732, +-425.89696016, + 413.14941255, +-170.28626133, +-278.90887903, + 657.31382911, + 849.93382391, +-907.98537823, +-534.74601434, +-302.96126101, + 629.93295874, + 970.98285529, + 937.94340934, + 809.8966911 , +-406.88746987, + 984.02248683, +-501.15991789, +-788.18769024, + 901.90522211, +-533.15948906, + 379.53653016, +-883.28728204, + 461.41819825, + 763.44042467, +-455.12620907, + -241.88620785, +-251.40763336, + 497.57651508, +-524.38551492, +-656.2938019 , + -101.41670262, +-391.06318525, + 678.37824452, +-524.51634797, + 4.77891498, + 885.1671994 , + 267.99539549, + 734.57881092, + 880.41937871, + 501.52972377, + 399.15012045, + 935.93113321, + 988.8015793 , + -96.35663466, +-858.26044363, + -414.41193712, +-695.29058862, +-165.02725041, +-737.42134305, + 208.23560804, + -234.38388168, + 790.77176858, + 935.5893436 , + 93.76980334, +-450.35286026, + 184.46083752, + 793.52231645, +-186.53330833, + 104.15655338, +-456.69446479, + -89.1117011 , +-196.57292924, +-503.17306983, + 11.73276765, +-379.23834804, + -253.93027224, + 49.94088451, + 501.19004586, +-332.98506842, + 848.31753324, + 724.63709367, +-902.61940805, +-492.71495149, +-107.72897468, +-790.74422252, + -303.04802193 +}; + + +const double G[M * N] = { +1105726.1953911 , + 834552.91068223, + 1425422.0574969 , + -1047371.49309336, + 3038980.26963843, + -35355.5750294 , + -914732.90524533, +-1572886.12016268, + -205198.69594562, + 1455562.63642129, + 896656.39365987, + 1136682.53966406, + 818633.47745504, + 566728.64016368, + 1494790.17769532, + 116292.21701518, + -703783.31886208, + 407901.6481675 , + -2537178.20962196, + -282076.17769506, + 921604.83422523, + -1270752.4620856 , + 2786543.78788083, +-1753313.20471557, + -2011974.19476508, + -509245.28041767, + 1693515.26577912, + -1809828.40153653, +-1077204.40929408, + 805392.77843087, + 2102016.69778632, + 255485.8316759 , + 559521.71590166, + -934519.00018485, + 171523.30042215, + 1237746.90352739, + 163482.25029678, + 254724.25267775, + 165140.23910059, + 117181.48305624, + 578187.07230925, +-1437616.76066154, + -644591.69571442, + 77260.49384251, + 765332.51562725, + 273266.12343122, + 1822238.53501726, + 760112.34711403, + -292061.1721647 , +-1302560.39497377, + -336255.99920251, + -1100456.15573725, + -910678.3959073 , + 2139135.77693076, + 97738.2917372 , + 431712.4424103 , +-1030691.25947681, + -1066570.96882187, +-2433890.36261745, + 1864976.82693305, + -202902.42088961, + -179355.38792898, +-1613581.99257609, + 201718.65817756, + 1156738.10959529, + 619258.95632025, + 260002.13010037, + 454456.27312495, + 609928.69521913, + 1921339.67377722, +-1474803.28369651, + -158226.12440343, + -1865008.00182816, + 825957.70728007, +-2485991.94748749, + 2928148.12673306, + 1472057.85221321, + -836140.02744227, + -3064457.38616271, +-1594423.48656484, +-1021554.17755137, + -211272.47414544, + -253286.7566116 , + 436109.9307683 , + 849020.44049088, + 188740.36083817, + 173588.37289097, + 671555.21783174, + -942299.38201779, +-1695277.34426229, + -1876601.06402039, + -611430.31831528, +-1820841.42045753, + -302998.1138598 , + 123049.87392131, + 679915.53483501, + -278406.46949767, +-1475075.52948883, + 519138.86510664, + -981289.74405096, + 135534.90897368, + -32162.2000737 , + -1376145.15575287, + 1333436.19410184, + 574872.35773654, + 3581792.92698301, + 1415084.68540009, + 1197985.79252617, + 1697930.23447992, + 1390087.10825281, + 289772.87648947, + -242567.93488219, + 59886.85035731, + -672332.67087228, + 632151.57445241, +-1243921.7592546 , + 1295127.20372779, + -786571.92909188, +-1059725.31646476, + -998324.6537922 , + -1321748.09761952, + 1566839.90435469, + 784535.50338885, + 1058471.256631 , + 344392.00063866, + 801782.46564234, + 529100.45626754, + 628463.20634206, + 1653982.81913581, + 1277180.66366018, + 232932.24178241, + 1085016.44395268, + -208273.03726172, + -674013.81219242, + -415462.79697582, + -716046.80521699, + 276643.60013034, + 453382.7407658 , + 886276.3653967 , + 204698.60022489, + -132688.74220105, + -1067873.26999249, +-2229869.60793253, + -71315.05481177, + 259258.3620372 , + 770762.57550064, + 267599.39944493, + -235198.54435461, + -256947.50413871, +-1785804.75065361, + 1284803.90632418, +-1617452.03574655, + 1624025.53697342, + -2876051.72030415, + -871703.94010902, + -747508.03992345, + -2426186.33391553, + 728545.55265403, + 4725705.86691843, + 2438617.78375226, + -602906.532758 , +-1807456.01664149, + -180009.7591881 , +-3253299.79353681, +-1016299.73658917, + 399517.80677653, + -614378.96218134, +-1410088.57973978, + -205244.82827959, +-2239082.80763875, +-2462650.1723152 , + 1456148.94627882, + -188777.89740542, + 1154928.74037611, + -251377.77882688, + -840694.39523902, + 831721.15800667, + 3304307.39337696, + 877720.30280171, + 2057498.6434299 , + 1039370.87045954, + 810723.18253791, + -346002.43232964, + -538393.65179991, + -292970.07254128, + 400666.69351448, + -360835.57617784, + 565600.66597296, + 125305.5323382 , + -1295324.99216217, +-1930200.67366356, +-1673460.52120975, + -398937.56984704, + 1039538.11678491, +-1000657.06411086, + 880933.2494595 , + 1672985.09224214, + 3096014.22084913, + -40499.22876092, + -508956.48976077, + -377465.98473018, + -549970.09886128, +-1049601.95117843, +-1211993.49389043, + 697487.60208923, +-1355187.79593249, +-1516611.52344977, + -1792339.31501558, + 14971.5418107 , + 720865.98008411, + -754727.64317987, + 58926.56905403, + 2156807.90695065, + 617437.29661957, + 955830.85634253, + 258836.12013994, + -474840.58702846, + 2973628.96140897, + 3243300.97194598, + -122561.9741524 , + -565662.05631341, + 239733.35260778, + 1137494.24477987, + 959478.11814098, + -395833.80179684, + -134248.28910505, + -167385.76672974, + -363927.61376987, + -434583.05586946, + 888772.48244177, + 1466326.32982088, + 928991.91586665, +-1498179.92878382, + 580369.25454836, + 428750.65836342, + 267688.71961185, + -635560.80512421, + 1198016.3224926 , + 140328.74458671, + -863994.23059744, + -1236471.45536021, + -779493.65351091, + 363906.95596867, + 275217.54801551, +-2484588.72973326, +-1334873.06834923, + 472212.8208042 , + 2005062.2333113 , + 1764099.20038334, + -517307.9505347 , + -70767.58647184, + 1839430.05400531, + -425869.480307 , + 822234.49383117, + 1588925.6409131 , + 1849472.01491101 +}; + diff --git a/kernels/matmul/16x16xf64/data.h b/kernels/matmul/16x16xf64/data.h new file mode 100644 index 00000000..f7535b14 --- /dev/null +++ b/kernels/matmul/16x16xf64/data.h @@ -0,0 +1,9 @@ +#pragma once + +#define M 16 +#define K 16 +#define N 16 + +extern const double X[M * K]; +extern const double Y[K * N]; +extern const double G[M * N]; diff --git a/kernels/matmul/16x16xf64/linalg.mlir b/kernels/matmul/16x16xf64/linalg.mlir new file mode 100644 index 00000000..26129109 --- /dev/null +++ b/kernels/matmul/16x16xf64/linalg.mlir @@ -0,0 +1,12 @@ +func.func public @matmul(%X: memref<16x16xf64>, + %Y: memref<16x16xf64>, + %Z: memref<16x16xf64>) { + "linalg.generic"(%X, %Y, %Z) ({ + ^bb0(%x: f64, %y: f64, %z: f64): + %r0 = arith.mulf %x, %y : f64 + %r1 = arith.addf %z, %r0 : f64 + "linalg.yield"(%r1) : (f64) -> () + }) {indexing_maps = [affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, affine_map<(m, n, k) -> (m, + n)>], iterator_types = [#linalg.iterator_type, #linalg.iterator_type, #linalg.iterator_type], operand_segment_sizes = array} : (memref<16x16xf64>, memref<16x16xf64>, memref<16x16xf64>) -> () + func.return +} diff --git a/kernels/matmul/16x16xf64/linalg.xdsl.mlir b/kernels/matmul/16x16xf64/linalg.xdsl.mlir new file mode 100644 index 00000000..3b36c287 --- /dev/null +++ b/kernels/matmul/16x16xf64/linalg.xdsl.mlir @@ -0,0 +1,11 @@ +func.func public @matmul(%X: memref<16x16xf64>, + %Y: memref<16x16xf64>, + %Z: memref<16x16xf64>) { + "linalg.generic"(%X, %Y, %Z) ({ + ^bb0(%x: f64, %y: f64, %z: f64): + %r0 = arith.mulf %x, %y : f64 + %r1 = arith.addf %z, %r0 : f64 + "linalg.yield"(%r1) : (f64) -> () + }) {indexing_maps = [affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, affine_map<(m, n, k) -> (m, n)>], iterator_types = [#linalg.iterator_type, #linalg.iterator_type, #linalg.iterator_type], operandSegmentSizes = array} : (memref<16x16xf64>, memref<16x16xf64>, memref<16x16xf64>) -> () + func.return +} diff --git a/kernels/matmul/16x16xf64/main.c b/kernels/matmul/16x16xf64/main.c new file mode 100644 index 00000000..945ea11a --- /dev/null +++ b/kernels/matmul/16x16xf64/main.c @@ -0,0 +1,43 @@ +#include "data.h" + +#include + +#include + +// Kernel provided via external definition +void matmul(double *x, double *y, double *g); + +int main() { + // Allocate shared local memory + // By avoiding allocators and bumping by a known offset a base pointer + // (snrt_l1_next()) that is the same for all the cores in the cluster, we are + // essentially providing the same memory regions to all the cores in this cluster. + double *local_x = (double *)snrt_l1_next(); + double *local_y = local_x + K * N; + double *local_z = local_y + M * N; + + // Copy data in shared local memory + if (snrt_is_dm_core()) { + snrt_dma_start_1d(local_x, X, M * N * sizeof(double)); + snrt_dma_start_1d(local_y, Y, M * N * sizeof(double)); + } + + snrt_cluster_hw_barrier(); + + // Launch kernel: from this point on only core 0 is required to be alive. + int thiscore = snrt_cluster_core_idx(); + if (thiscore != 0) return 0; + + (void)snrt_mcycle(); + matmul(local_x, local_y, local_z); + (void)snrt_mcycle(); + + // Correctness check + int nerr = 0; + for (int i = 0; i < M * N; i++) { + double d = fabs(local_z[i] - G[i]); + nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path + // on the taken branch) + } + return nerr; +} diff --git a/kernels/matmul/8x8xf64/Makefile b/kernels/matmul/8x8xf64/Makefile new file mode 100644 index 00000000..0d36d7f6 --- /dev/null +++ b/kernels/matmul/8x8xf64/Makefile @@ -0,0 +1,8 @@ +.DEFAULT_GOAL := all + +include ../../../snitch/Makefile.rules + +TESTS = +TESTS += baseline.x + +include ../../Makefile.kernels diff --git a/kernels/matmul/8x8xf64/baseline.c b/kernels/matmul/8x8xf64/baseline.c new file mode 100644 index 00000000..8b245d78 --- /dev/null +++ b/kernels/matmul/8x8xf64/baseline.c @@ -0,0 +1,16 @@ +#include "data.h" + +#include + +#include + +void matmul(const double* x, const double* y, double* g) { + for (uint32_t i = 0; i < M; ++i) { + for (uint32_t j = 0; j < N; ++j) { + for (uint32_t k = 0; k < K; ++k) { + // row-major accesses + g[i * N + j] += x[i * K + k] * y[k * N + j]; + } + } + } +} diff --git a/kernels/matmul/8x8xf64/data.c b/kernels/matmul/8x8xf64/data.c new file mode 100644 index 00000000..ba859643 --- /dev/null +++ b/kernels/matmul/8x8xf64/data.c @@ -0,0 +1,207 @@ +#define M 8 +#define K 8 +#define N 8 + +const double X[M * K] = { +97.62700785, + 430.37873274, + 205.52675214, + 89.76636599, +-152.69040132, + 291.78822613, +-124.82557747, + 783.54600156, + 927.325521 , +-233.11696235, + 583.45007617, + 57.78983951, + 136.08912219, + 851.19327659, +-857.9278836 , + -825.7414006 , +-959.56320512, + 665.2396911 , + 556.3135019 , + 740.02429649, + 957.23668447, + 598.31712843, + -77.04127549, + 561.05835257, +-763.45114826, + 279.84204266, +-713.29342518, + 889.3378341 , + 43.6966435 , +-170.67612002, + -470.88877579, + 548.46737887, + -87.69933557, + 136.86789774, +-962.42039913, + 235.27099415, + 224.19144544, + 233.86799375, + 887.49615703, + 363.64059821, + -280.98419885, +-125.9360924 , + 395.26239185, +-879.54905674, + 333.53343089, + 341.27573924, +-579.23487785, +-742.14740469, +-369.14329815, +-272.57845811, + 140.39354084, +-122.79697308, + 976.74767612, +-795.9103785 , +-582.24648781, + -677.38096423, + 306.21665093, +-493.41679492, + -67.37845429, +-511.148816 , + -682.06083271, +-779.24971767, + 312.65917893, +-723.6340973 +}; + + +const double Y[K * N] = { +-606.83527664, +-262.54965868, + 641.9864597 , +-805.79744841, + 675.889815 , + -807.80318421, + 952.91893003, + -62.6975967 , + 953.52217638, + 209.69103949, + 478.5271588 , +-921.62441549, +-434.38607485, +-759.60687757, +-407.71960496, + -762.54456209, +-364.03364121, +-171.47401097, +-871.7050073 , + 384.94423874, + 133.20290841, +-469.22101812, + 46.49610693, +-812.11897848, + 151.89299111, + 858.59239515, +-362.8620951 , + 334.82075993, +-736.40427519, + 432.65440824, + -421.18781411, +-633.61727599, + 173.02586962, +-959.78490763, + 657.88005843, + -990.60904761, + 355.63307359, +-459.98405362, + 470.38804425, + 924.37709023, + -502.49371296, + 152.31466884, + 184.08386254, + 144.50381158, +-553.83673472, + 905.49802303, +-105.74924276, + 692.81734494, + 398.95855064, +-405.12609829, + 627.5956394 , +-206.98851831, + 762.20639422, + 162.54574527, + 763.47072371, + 385.06318016, + 450.50855964, + 2.64876385, + 912.16726945, + 287.98039846, + -152.28990288, + 212.78642826, +-961.61360338, +-396.85036665 +}; + + +const double G[M * N] = { +4.20101194e+05, + 3.50083946e+05, + 6.46538181e+05, + 7.87611951e+04, + -5.90066796e+05, + 1.75051891e+04, +-1.06214482e+06, +-8.56097454e+05, + -2.10708718e+06, + 1.63457337e+03, +-1.09121328e+06, +-3.60471830e+05, + -1.87997154e+05, +-4.27787358e+05, + 1.09454317e+06, + 3.22031941e+05, + 1.21350670e+06, + 1.36502415e+05, + 1.52156393e+05, +-6.22301317e+04, + -1.54349419e+06, + 5.37282592e+05, +-1.68277900e+06, +-3.20747360e+05, + 1.27742014e+06, + 1.26930043e+06, + 1.44955858e+05, + 5.67935087e+05, + -1.71986709e+06, + 9.89135329e+05, +-2.09766800e+06, +-6.26582774e+05, + 1.00898615e+06, +-1.19381322e+05, + 1.84200115e+06, +-6.14448878e+05, + 1.51101302e+05, + 8.50535943e+05, + 1.25405040e+05, + 1.10035520e+06, + -9.06271345e+05, +-8.11028383e+05, +-1.02428859e+06, +-1.74769700e+05, + 1.66273819e+05, +-3.39830959e+05, + 5.64655615e+05, + 9.66178163e+05, + -7.41759606e+04, +-9.14352682e+05, +-9.32476120e+05, +-5.95542872e+05, + 4.25567604e+05, +-1.02251944e+06, + 5.68088238e+05, + 5.90867503e+05, + -6.37130208e+05, +-2.03819589e+05, +-8.51329047e+05, + 3.00857968e+05, + 1.32626599e+06, +-5.57125464e+05, + 1.40126649e+06, +-2.71458462e+04 +}; + diff --git a/kernels/matmul/8x8xf64/data.h b/kernels/matmul/8x8xf64/data.h new file mode 100644 index 00000000..f3da7f22 --- /dev/null +++ b/kernels/matmul/8x8xf64/data.h @@ -0,0 +1,9 @@ +#pragma once + +#define M 8 +#define K 8 +#define N 8 + +extern const double X[M * K]; +extern const double Y[K * N]; +extern const double G[M * N]; diff --git a/kernels/matmul/8x8xf64/main.c b/kernels/matmul/8x8xf64/main.c new file mode 100644 index 00000000..945ea11a --- /dev/null +++ b/kernels/matmul/8x8xf64/main.c @@ -0,0 +1,43 @@ +#include "data.h" + +#include + +#include + +// Kernel provided via external definition +void matmul(double *x, double *y, double *g); + +int main() { + // Allocate shared local memory + // By avoiding allocators and bumping by a known offset a base pointer + // (snrt_l1_next()) that is the same for all the cores in the cluster, we are + // essentially providing the same memory regions to all the cores in this cluster. + double *local_x = (double *)snrt_l1_next(); + double *local_y = local_x + K * N; + double *local_z = local_y + M * N; + + // Copy data in shared local memory + if (snrt_is_dm_core()) { + snrt_dma_start_1d(local_x, X, M * N * sizeof(double)); + snrt_dma_start_1d(local_y, Y, M * N * sizeof(double)); + } + + snrt_cluster_hw_barrier(); + + // Launch kernel: from this point on only core 0 is required to be alive. + int thiscore = snrt_cluster_core_idx(); + if (thiscore != 0) return 0; + + (void)snrt_mcycle(); + matmul(local_x, local_y, local_z); + (void)snrt_mcycle(); + + // Correctness check + int nerr = 0; + for (int i = 0; i < M * N; i++) { + double d = fabs(local_z[i] - G[i]); + nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path + // on the taken branch) + } + return nerr; +} diff --git a/kernels/matmul/gendata.py b/kernels/matmul/gendata.py new file mode 100644 index 00000000..87f7ae4d --- /dev/null +++ b/kernels/matmul/gendata.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 + +import numpy as np +import argparse +import sys + + +C_TYPES = { + '32': 'float', + '64': 'double', +} + +NUMPY_TYPES = { + '32': np.single, + '64': np.double, +} + +MLIR_TYPES = { + '32': 'f32', + '64': 'f64', +} + +MEMREF_GLOBAL = """ +memref.global constant @{symbol} : memref<{shape}x{type}> = dense<[ +{initializer} +]> +""" + + +ARRAY_GLOBAL = """ +const {type} {symbol}[{shape}] = {{ +{initializer} +}}; +""" + + +def array_to_memref_initializer(array: np.array): + return ",\n".join(f" {np.array2string(row, separator=', ')}" for row in array) + + +def array_to_memref(array: np.array, precision: int, shape=None, symbol=None): + return MEMREF_GLOBAL.format( + symbol=symbol or "array", + type=MLIR_TYPES[str(precision)], + shape=shape or "x".join(str(dim) for dim in array.shape), + initializer=array_to_memref_initializer(array), + ) + + +def array_to_c_initializer(array: np.array): + return np.array2string(array.flatten(), separator=",\n").strip(" []") + + +def array_to_c(array: np.array, *, precision: int, shape=None, symbol=None): + return ARRAY_GLOBAL.format( + symbol=symbol or "array", + type=C_TYPES[str(precision)], + shape=shape or "*".join(str(dim) for dim in array.shape), + initializer=array_to_c_initializer(array), + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="gendata.py", + description="Generate literal initializers for a fictional BLAS matmul " + "(matrix-matrix single precision multiplication) on 2d memrefs", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "-r", + "--range", + type=float, + nargs=2, + default=(-1000.0, 1000.0), + help="uniform distribution range", + ) + parser.add_argument("-m", "--rows", type=int, default=16, help="number of rows") + parser.add_argument( + "-n", "--columns", type=int, default=16, help="number of columns" + ) + parser.add_argument( + "-k", + "--inner-dimension", + type=int, + default=16, + help="size of inner dimension", + ) + parser.add_argument( + "--format", default="c", choices=["mlir", "c"], help="output format" + ) + parser.add_argument( + "--precision", + type=int, + default=64, + choices=[32, 64], + help="floating-point precision to use", + ) + args = parser.parse_args() + + rmin, rmax = args.range + m = args.rows + n = args.columns + k = args.inner_dimension + np.random.seed(0) + x = np.random.uniform(rmin, rmax, m * k).astype(np.float64).reshape((m, k)) + y = np.random.uniform(rmin, rmax, k * n).astype(np.float64).reshape((k, n)) + + g = x @ y + + printopts = {"linewidth": None, "threshold": sys.maxsize} + if args.format == "c": + fmt = array_to_c + print(f"#define M {m}") + print(f"#define K {k}") + print(f"#define N {n}") + printopts["formatter"] = {"double ": lambda x: f"{x:+}f"} + else: + assert args.format == "mlir" + fmt = array_to_memref + printopts["sign"] = "+" + np.set_printoptions(**printopts) + print(fmt(x, shape="M * K", precision=args.precision, symbol="X")) + print(fmt(y, shape="K * N", precision=args.precision, symbol="Y")) + print(fmt(g, shape="M * N", precision=args.precision, symbol="G")) diff --git a/kernels/relu/16x16xf64/Makefile b/kernels/relu/16x16xf64/Makefile new file mode 100644 index 00000000..0d36d7f6 --- /dev/null +++ b/kernels/relu/16x16xf64/Makefile @@ -0,0 +1,8 @@ +.DEFAULT_GOAL := all + +include ../../../snitch/Makefile.rules + +TESTS = +TESTS += baseline.x + +include ../../Makefile.kernels diff --git a/kernels/relu/16x16xf64/baseline.c b/kernels/relu/16x16xf64/baseline.c new file mode 100644 index 00000000..785d7e39 --- /dev/null +++ b/kernels/relu/16x16xf64/baseline.c @@ -0,0 +1,17 @@ +#include "data.h" + +#include + +#include + +// Copied from math.h +double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); } + +void relu(const double* x, double* y) { + for (uint32_t i = 0; i < M; ++i) { + for (uint32_t j = 0; j < N; ++j) { + // row-major accesses + y[i * N + j] = fmax(0.0, x[i * N + j]); + } + } +} diff --git a/kernels/relu/16x16xf64/data.c b/kernels/relu/16x16xf64/data.c new file mode 100644 index 00000000..4e71f67c --- /dev/null +++ b/kernels/relu/16x16xf64/data.c @@ -0,0 +1,522 @@ +#define M 16 +#define N 16 + +const double X[M * N] = { +97.62700785, + 430.37873274, + 205.52675214, + 89.76636599, +-152.69040132, + 291.78822613, +-124.82557747, + 783.54600156, + 927.325521 , +-233.11696235, + 583.45007617, + 57.78983951, + 136.08912219, + 851.19327659, +-857.9278836 , + -825.7414006 , +-959.56320512, + 665.2396911 , + 556.3135019 , + 740.02429649, + 957.23668447, + 598.31712843, + -77.04127549, + 561.05835257, +-763.45114826, + 279.84204266, +-713.29342518, + 889.3378341 , + 43.6966435 , +-170.67612002, + -470.88877579, + 548.46737887, + -87.69933557, + 136.86789774, +-962.42039913, + 235.27099415, + 224.19144544, + 233.86799375, + 887.49615703, + 363.64059821, + -280.98419885, +-125.9360924 , + 395.26239185, +-879.54905674, + 333.53343089, + 341.27573924, +-579.23487785, +-742.14740469, +-369.14329815, +-272.57845811, + 140.39354084, +-122.79697308, + 976.74767612, +-795.9103785 , +-582.24648781, + -677.38096423, + 306.21665093, +-493.41679492, + -67.37845429, +-511.148816 , + -682.06083271, +-779.24971767, + 312.65917893, +-723.6340973 , +-606.83527664, + -262.54965868, + 641.9864597 , +-805.79744841, + 675.889815 , +-807.80318421, + 952.91893003, + -62.6975967 , + 953.52217638, + 209.69103949, + 478.5271588 , + -921.62441549, +-434.38607485, +-759.60687757, +-407.71960496, +-762.54456209, + -364.03364121, +-171.47401097, +-871.7050073 , + 384.94423874, + 133.20290841, + -469.22101812, + 46.49610693, +-812.11897848, + 151.89299111, + 858.59239515, + -362.8620951 , + 334.82075993, +-736.40427519, + 432.65440824, +-421.18781411, + -633.61727599, + 173.02586962, +-959.78490763, + 657.88005843, +-990.60904761, + 355.63307359, +-459.98405362, + 470.38804425, + 924.37709023, +-502.49371296, + 152.31466884, + 184.08386254, + 144.50381158, +-553.83673472, + 905.49802303, + -105.74924276, + 692.81734494, + 398.95855064, +-405.12609829, + 627.5956394 , + -206.98851831, + 762.20639422, + 162.54574527, + 763.47072371, + 385.06318016, + 450.50855964, + 2.64876385, + 912.16726945, + 287.98039846, +-152.28990288, + 212.78642826, +-961.61360338, +-396.85036665, + 320.34707499, +-419.84478558, + 236.030858 , +-142.46259811, +-729.05187156, +-403.43534809, + 139.9298214 , + 181.7455225 , + 148.6504977 , + 306.40163971, + 304.20654 , +-137.16312913, + 793.0931917 , +-264.8762599 , +-128.27014947, + 783.84671003, + 612.38797809, + 407.77716708, +-799.54622538, + 838.96522749, + 428.4825991 , + 997.69401314, + -701.10339068, + 736.25211474, +-675.01413065, + 231.11912857, +-752.3600343 , + 696.01645864, + 614.63791745, + 138.20147723, +-185.63340555, +-861.66600909, + 394.85754629, + -92.91463464, + 444.11119894, + 732.76465186, + 951.04301001, + 711.60668479, +-976.57183163, +-280.04387104, + 459.98112485, +-656.74064548, + 42.07321241, +-891.32402332, +-600.00695021, +-962.95641108, + 587.39540671, + -552.15062388, +-309.29663861, + 856.16258693, + 408.82880385, +-936.32214094, + -670.611687 , + 242.956803 , + 154.45717721, +-524.21435725, + 868.42799585, + 227.93191193, + 71.26560605, + 179.81995271, + 460.24405903, +-376.11000904, + -203.55787557, +-580.31250205, +-627.61398824, + 888.74477997, + 479.1015901 , + -19.08238276, +-545.17074405, +-491.28703646, +-883.94167935, +-131.16674888, + -376.40823601, + 392.68697763, +-244.49632142, +-640.79264488, +-950.64254322, + -865.50073707, + 358.785547 , + -92.60631089, + 73.15842222, + 793.34258608, + 980.67789479, +-566.2060312 , + 326.1564062 , +-473.35524653, +-958.69800107, + 516.75730767, +-359.96569836, +-233.07221166, + 176.63422711, + 662.09691047, + 257.96368718, + 745.30131089, +-452.91593037, + 596.09366783, +-628.72811139, + 905.58331394, + 374.97655278, +-568.98464577, + 894.74118098, + 461.71161354, + -492.11671481, +-573.37604527, + 36.40142786, +-948.67456389, +-585.05984912, + -150.6290625 , +-251.66003933, + -72.84915127, +-444.74258741, + 173.56869292, + 727.71121185, +-764.93628808, + 34.75821431, +-735.86378731, + 433.71936239, + -207.88059439, + 130.84262372, +-633.44032757, +-710.30448131, + -23.8874387 , + -288.7745243 , + 880.86389051, + 530.65050761, + 497.3272397 , + 807.43947949, + -833.15512912 +}; + + +const double Y[M * N] = { +97.62700785, +430.37873274, +205.52675214, + 89.76636599, + 0. , + 291.78822613, + 0. , +783.54600156, +927.325521 , + 0. , + 583.45007617, + 57.78983951, +136.08912219, +851.19327659, + 0. , + 0. , + 0. , +665.2396911 , +556.3135019 , +740.02429649, + 957.23668447, +598.31712843, + 0. , +561.05835257, + 0. , + 279.84204266, + 0. , +889.3378341 , + 43.6966435 , + 0. , + 0. , +548.46737887, + 0. , +136.86789774, + 0. , + 235.27099415, +224.19144544, +233.86799375, +887.49615703, +363.64059821, + 0. , + 0. , +395.26239185, + 0. , +333.53343089, + 341.27573924, + 0. , + 0. , + 0. , + 0. , + 140.39354084, + 0. , +976.74767612, + 0. , + 0. , + 0. , +306.21665093, + 0. , + 0. , + 0. , + 0. , + 0. , +312.65917893, + 0. , + 0. , + 0. , +641.9864597 , + 0. , +675.889815 , + 0. , + 952.91893003, + 0. , +953.52217638, +209.69103949, +478.5271588 , + 0. , + 0. , + 0. , + 0. , + 0. , + 0. , + 0. , + 0. , +384.94423874, +133.20290841, + 0. , + 46.49610693, + 0. , +151.89299111, +858.59239515, + 0. , +334.82075993, + 0. , +432.65440824, + 0. , + 0. , +173.02586962, + 0. , +657.88005843, + 0. , + 355.63307359, + 0. , +470.38804425, +924.37709023, + 0. , + 152.31466884, +184.08386254, +144.50381158, + 0. , +905.49802303, + 0. , +692.81734494, +398.95855064, + 0. , +627.5956394 , + 0. , +762.20639422, +162.54574527, +763.47072371, +385.06318016, + 450.50855964, + 2.64876385, +912.16726945, +287.98039846, + 0. , + 212.78642826, + 0. , + 0. , +320.34707499, + 0. , + 236.030858 , + 0. , + 0. , + 0. , +139.9298214 , + 181.7455225 , +148.6504977 , +306.40163971, +304.20654 , + 0. , + 793.0931917 , + 0. , + 0. , +783.84671003, +612.38797809, + 407.77716708, + 0. , +838.96522749, +428.4825991 , +997.69401314, + 0. , +736.25211474, + 0. , +231.11912857, + 0. , + 696.01645864, +614.63791745, +138.20147723, + 0. , + 0. , + 394.85754629, + 0. , +444.11119894, +732.76465186, +951.04301001, + 711.60668479, + 0. , + 0. , +459.98112485, + 0. , + 42.07321241, + 0. , + 0. , + 0. , +587.39540671, + 0. , + 0. , +856.16258693, +408.82880385, + 0. , + 0. , +242.956803 , +154.45717721, + 0. , +868.42799585, + 227.93191193, + 71.26560605, +179.81995271, +460.24405903, + 0. , + 0. , + 0. , + 0. , +888.74477997, +479.1015901 , + 0. , + 0. , + 0. , + 0. , + 0. , + 0. , +392.68697763, + 0. , + 0. , + 0. , + 0. , +358.785547 , + 0. , + 73.15842222, +793.34258608, + 980.67789479, + 0. , +326.1564062 , + 0. , + 0. , + 516.75730767, + 0. , + 0. , +176.63422711, +662.09691047, + 257.96368718, +745.30131089, + 0. , +596.09366783, + 0. , + 905.58331394, +374.97655278, + 0. , +894.74118098, +461.71161354, + 0. , + 0. , + 36.40142786, + 0. , + 0. , + 0. , + 0. , + 0. , + 0. , +173.56869292, + 727.71121185, + 0. , + 34.75821431, + 0. , +433.71936239, + 0. , +130.84262372, + 0. , + 0. , + 0. , + 0. , +880.86389051, +530.65050761, +497.3272397 , +807.43947949, + 0. +}; + diff --git a/kernels/relu/16x16xf64/data.h b/kernels/relu/16x16xf64/data.h new file mode 100644 index 00000000..f11a14fc --- /dev/null +++ b/kernels/relu/16x16xf64/data.h @@ -0,0 +1,7 @@ +#pragma once + +#define M 16 +#define N 16 + +extern const double X[M * N]; +extern const double Y[M * N]; diff --git a/kernels/relu/16x16xf64/main.c b/kernels/relu/16x16xf64/main.c new file mode 100644 index 00000000..c5bcf30d --- /dev/null +++ b/kernels/relu/16x16xf64/main.c @@ -0,0 +1,41 @@ +#include "data.h" + +#include + +#include + +// Kernel provided via external definition +void relu(double *x, double *y); + +int main() { + // Allocate shared local memory + // By avoiding allocators and bumping by a known offset a base pointer + // (snrt_l1_next()) that is the same for all the cores in the cluster, we are + // essentially providing the same memory regions to all the cores in this cluster. + double *local_x = (double *)snrt_l1_next(); + double *local_y = local_x + M * N; + + // Copy data in shared local memory + if (snrt_is_dm_core()) { + snrt_dma_start_1d(local_x, X, M * N * sizeof(double)); + } + + snrt_cluster_hw_barrier(); + + // Launch kernel: from this point on only core 0 is required to be alive. + int thiscore = snrt_cluster_core_idx(); + if (thiscore != 0) return 0; + + (void)snrt_mcycle(); + relu(local_x, local_y); + (void)snrt_mcycle(); + + // Correctness check + int nerr = 0; + for (int i = 0; i < M * N; i++) { + double d = fabs(local_y[i] - Y[i]); + nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path + // on the taken branch) + } + return nerr; +} diff --git a/kernels/relu/gendata.py b/kernels/relu/gendata.py new file mode 100644 index 00000000..7ffe15a8 --- /dev/null +++ b/kernels/relu/gendata.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import numpy as np +import argparse +import sys + + +C_TYPES = { + "32": "float", + "64": "double", +} + +NUMPY_TYPES = { + "32": np.single, + "64": np.double, +} + +MLIR_TYPES = { + "32": "f32", + "64": "f64", +} + +MEMREF_GLOBAL = """ +memref.global constant @{symbol} : memref<{shape}x{type}> = dense<[ +{initializer} +]> +""" + + +ARRAY_GLOBAL = """ +const {type} {symbol}[{shape}] = {{ +{initializer} +}}; +""" + + +def array_to_memref_initializer(array: np.array): + return ",\n".join(f" {np.array2string(row, separator=', ')}" for row in array) + + +def array_to_memref(array: np.array, precision: int, shape=None, symbol=None): + return MEMREF_GLOBAL.format( + symbol=symbol or "array", + type=MLIR_TYPES[str(precision)], + shape=shape or "x".join(str(dim) for dim in array.shape), + initializer=array_to_memref_initializer(array), + ) + + +def array_to_c_initializer(array: np.array): + return np.array2string(array.flatten(), separator=",\n").strip(" []") + + +def array_to_c(array: np.array, *, precision: int, shape=None, symbol=None): + return ARRAY_GLOBAL.format( + symbol=symbol or "array", + type=C_TYPES[str(precision)], + shape=shape or "*".join(str(dim) for dim in array.shape), + initializer=array_to_c_initializer(array), + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="gendata.py", + description="Generate literal initializers for a fictional BLAS matmul " + "(matrix-matrix single precision multiplication) on 2d memrefs", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "-r", + "--range", + type=float, + nargs=2, + default=(-1000.0, 1000.0), + help="uniform distribution range", + ) + parser.add_argument("-m", "--rows", type=int, default=16, help="number of rows") + parser.add_argument( + "-n", "--columns", type=int, default=16, help="number of columns" + ) + parser.add_argument( + "-k", + "--inner-dimension", + type=int, + default=16, + help="size of inner dimension", + ) + parser.add_argument( + "--format", default="c", choices=["mlir", "c"], help="output format" + ) + parser.add_argument( + "--precision", + type=int, + default=64, + choices=[32, 64], + help="floating-point precision to use", + ) + args = parser.parse_args() + + rmin, rmax = args.range + m = args.rows + n = args.columns + np.random.seed(0) + x = np.random.uniform(rmin, rmax, m * n).astype(np.float64).reshape((m, n)) + y = np.fmax(x, 0) + + printopts = {"linewidth": None, "threshold": sys.maxsize} + if args.format == "c": + fmt = array_to_c + print(f"#define M {m}") + print(f"#define N {n}") + printopts["formatter"] = {"double ": lambda x: f"{x:+}f"} + else: + assert args.format == "mlir" + fmt = array_to_memref + printopts["sign"] = "+" + np.set_printoptions(**printopts) + print(fmt(x, shape="M * N", precision=args.precision, symbol="X")) + print(fmt(y, shape="M * N", precision=args.precision, symbol="Y")) diff --git a/kernels/ssum/8x16xf32/Makefile b/kernels/ssum/8x16xf32/Makefile index 2343e1f9..5a659914 100644 --- a/kernels/ssum/8x16xf32/Makefile +++ b/kernels/ssum/8x16xf32/Makefile @@ -2,14 +2,7 @@ include ../../../snitch/Makefile.rules -PRES = -PRES += pres_0_llvm.x -PRES += pres_1_llvm_clean.x -PRES += pres_2_vectorized.x -PRES += pres_3_ssr_loop.x -PRES += pres_4_ssr_frep.x - -TESTS = $(PRES) +TESTS = TESTS += baseline.x TESTS += noalias.x TESTS += ssr1d.x @@ -20,10 +13,3 @@ TESTS += vector.x TESTS += scf.x include ../../Makefile.kernels - -# Presentation specific - -all-pres: $(PRES) - -allrun-pres: TESTS=$(PRES) -allrun-pres: $(RUN) diff --git a/kernels/ssum/8x16xf32/pres_0_llvm.S.txt b/kernels/ssum/8x16xf32/pres_0_llvm.S.txt deleted file mode 100644 index 882f763c..00000000 --- a/kernels/ssum/8x16xf32/pres_0_llvm.S.txt +++ /dev/null @@ -1,31 +0,0 @@ -# LLVM output (1189 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "baseline.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: # @ssum -# %bb.0: - mv a3, zero - addi a4, zero, 512 -.LBB0_1: # =>This Inner Loop Header: Depth=1 - add a5, a0, a3 - flw ft0, 0(a5) - add a5, a1, a3 - flw ft1, 0(a5) - fadd.s ft0, ft1, ft0 - add a5, a2, a3 - addi a3, a3, 4 - fsw ft0, 0(a5) - bne a3, a4, .LBB0_1 -# %bb.2: - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/ssum/8x16xf32/pres_1_llvm_clean.S.txt b/kernels/ssum/8x16xf32/pres_1_llvm_clean.S.txt deleted file mode 100644 index 7e1ce005..00000000 --- a/kernels/ssum/8x16xf32/pres_1_llvm_clean.S.txt +++ /dev/null @@ -1,30 +0,0 @@ -# LLVM output cleaned (1189 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "baseline.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: - mv a3, zero # a3 <- 0 - addi a4, zero, 512 # a4 <- 512 -.loop_body: - add a5, a0, a3 # a5 <- a0 + a3 - flw ft0, 0(a5) # ft0 <- *a5 - add a5, a1, a3 # a5 <- a1 + a3 - flw ft1, 0(a5) # ft1 <- *a5 - fadd.s ft0, ft1, ft0 # ft0 <- ft1 + ft0 - add a5, a2, a3 # a5 <- a2 + a3 - addi a3, a3, 4 # a3 <- a3 + 4 - fsw ft0, 0(a5) # *a5 <- ft0 - bne a3, a4, .loop_body # if a3 < a4 repeat -# end_loop: - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/ssum/8x16xf32/pres_2_vectorized.S.txt b/kernels/ssum/8x16xf32/pres_2_vectorized.S.txt deleted file mode 100644 index afcdef49..00000000 --- a/kernels/ssum/8x16xf32/pres_2_vectorized.S.txt +++ /dev/null @@ -1,30 +0,0 @@ -# Vectorized (613 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "baseline.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: - mv a3, zero # a3 <- 0 - addi a4, zero, 512 # a4 <- 512 -.loop_body: - add a5, a0, a3 # a5 <- a0 + a3 - fld ft0, 0(a5) # ft0 <- *a5 - add a5, a1, a3 # a5 <- a1 + a3 - fld ft1, 0(a5) # ft1 <- *a5 - vfadd.s ft0, ft1, ft0 # ft0 <- ft1 + ft0 - add a5, a2, a3 # a5 <- a2 + a3 - addi a3, a3, 8 # a3 <- a3 + 8 - fsd ft0, 0(a5) # *a5 <- ft0 - bne a3, a4, .loop_body # if a3 < a4 repeat -# end_loop: - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/ssum/8x16xf32/pres_3_ssr_loop.S.txt b/kernels/ssum/8x16xf32/pres_3_ssr_loop.S.txt deleted file mode 100644 index f009fc3f..00000000 --- a/kernels/ssum/8x16xf32/pres_3_ssr_loop.S.txt +++ /dev/null @@ -1,34 +0,0 @@ -# SSR Loop (248 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "ssr1d.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: # @ssum -# Setup Snitch - addi a3, zero, 63 - scfgwi a3, 95 - addi a3, zero, 8 - scfgwi a3, 223 - scfgwi a0, 768 - scfgwi a1, 769 - scfgwi a2, 898 - csrsi 1984, 1 -# Loop - addi a0, zero, 64 # a0 <- 64 -.loop_body: - vfadd.s ft2, ft0, ft1 # stream sum - addi a0, a0, -1 # a0 <- a0 - 1 - bnez a0, .loop_body # if a0 > 0 repeat -# end_loop: - csrci 1984, 1 - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/kernels/ssum/8x16xf32/pres_4_ssr_frep.S.txt b/kernels/ssum/8x16xf32/pres_4_ssr_frep.S.txt deleted file mode 100644 index 93148d67..00000000 --- a/kernels/ssum/8x16xf32/pres_4_ssr_frep.S.txt +++ /dev/null @@ -1,31 +0,0 @@ -# SSR + FREP (118 cycles) - - .text - .attribute 4, 16 - .attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_zfh0p1" - .file "ssr1d.c" - .globl ssum # -- Begin function ssum - .p2align 2 - .type ssum,@function -ssum: # @ssum -# Setup Snitch - addi a3, zero, 63 - scfgwi a3, 95 - addi a3, zero, 8 - scfgwi a3, 223 - scfgwi a0, 768 - scfgwi a1, 769 - scfgwi a2, 898 - csrsi 1984, 1 -# Loop - addi a1, zero, 63 # a1 <- 63 - frep.o a1, 1, 0, 0 # repeat next instruction 63 + 1 times - vfadd.s ft2, ft0, ft1 # stream sum - csrci 1984, 1 - ret -.Lfunc_end0: - .size ssum, .Lfunc_end0-ssum - # -- End function - .ident "clang version 12.0.1 (git@github.com:pulp-platform/llvm-project.git d2f0eff9be1f58bb186499e2055eb6888ce88dcc)" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/scripts/run.sh b/scripts/run.sh index bb9ac293..5114056e 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -1,10 +1,11 @@ #!/usr/bin/env bash -VALID_ARGS=$(getopt -o h --long abort-on-error,skip-clean,skip-build,skip-run,skip-results,help -- "$@") +VALID_ARGS=$(getopt -o h --long tag:,abort-on-error,skip-clean,skip-build,skip-run,skip-results,help -- "$@") if [[ $? -ne 0 ]]; then exit 1; fi +TAG=$(date +"%FT%H%M%S") ABORT_ON_ERROR=0 SKIP_CLEAN=0 SKIP_BUILD=0 @@ -14,6 +15,16 @@ SKIP_RESULTS=0 eval set -- "$VALID_ARGS" while [ : ]; do case "$1" in + --tag) + case "$2" in + '') + ;; + *) + TAG=$2 + ;; + esac + shift 2 + ;; --abort-on-error) ABORT_ON_ERROR=1 shift @@ -38,6 +49,7 @@ while [ : ]; do ;; -h | --help) echo "" + echo "--tag [NAME] Provide custom tag for results files." echo "--abort-on-error Abort upon the first error that occurs." echo "--skip-clean Skip the clean step of build directories." echo "--skip-build Skip the build step." @@ -47,8 +59,8 @@ while [ : ]; do shift exit 0 ;; - --) shift; - break + --) shift; + break ;; esac done @@ -68,9 +80,13 @@ RESULTS_DIR=${THIS_DIR}/../results/ VENV_DIR=".venv" KERNEL_DIRS=( - "saxpy/64xf32/" - "ssum/8x16xf32/" - "ssum/14x26xf32/" + # "saxpy/64xf32/" + # "ssum/8x16xf32/" + # "ssum/14x26xf32/" + "dsum/8x16xf32/" + "matmul/8x8xf64/" + #"matmul/16x16xf64/" + "relu/16x16xf64/" ) if [[ 1 -eq ${ABORT_ON_ERROR} ]]; then @@ -80,7 +96,7 @@ fi # Clean step if [[ 0 -eq ${SKIP_CLEAN} ]]; then - make VENV_DIR=${VENV_DIR} -C ${XDSL_DIR} clean + make VENV_DIR=${VENV_DIR} -C ${XDSL_DIR} clean rm -rf ${SCRIPTS_DIR}/${VENV_DIR} for krnl in ${KERNEL_DIRS[@]}; do @@ -123,7 +139,6 @@ fi if [[ 0 -eq ${SKIP_RESULTS} ]]; then . ${SCRIPTS_DIR}/${VENV_DIR}/bin/activate - TAG=$(date +"%FT%H%M%S") CYCLES_CSV="${RESULTS_DIR}/cycles_${TAG}.csv" ${SCRIPTS_DIR}/harvest_results.py -s ${KERNEL_ROOT} \ diff --git a/xdsl b/xdsl index 393fa634..0bd082b6 160000 --- a/xdsl +++ b/xdsl @@ -1 +1 @@ -Subproject commit 393fa6349893fea348311dc68eb071b01355f028 +Subproject commit 0bd082b621951e2f8e2a5795550267476ce0ae6d