diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py index d84e83f3..79c24b28 100644 --- a/slothy/targets/aarch64/aarch64_neon.py +++ b/slothy/targets/aarch64/aarch64_neon.py @@ -1665,12 +1665,32 @@ class neg(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-n inputs = ["Xa"] outputs = ["Xd"] +class negs(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "negs , " + inputs = ["Xa"] + outputs = ["Xd"] + modifiesFlags=True + class ngc_zero(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name pattern = "ngc , xzr" inputs = [] outputs = ["Xd"] dependsOnFlags=True +class ngcs(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "ngcs , " + inputs = ["Xa"] + outputs = ["Xd"] + modifiesFlags=True + dependsOnFlags=True + +class ngcs_zero(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "ngcs , xzr" + inputs = [] + outputs = ["Xd"] + modifiesFlags=True + dependsOnFlags=True + class adds(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name pattern = "adds , , " inputs = ["Xa"] @@ -1720,6 +1740,13 @@ class sbcs_zero(AArch64BasicArithmetic): # pylint: disable=missing-docstring,inv modifiesFlags=True dependsOnFlags=True +class sbcs_to_zero(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name + pattern = "sbcs xzr, , " + inputs = ["Xa", "Xb"] + outputs = [] + modifiesFlags=True + dependsOnFlags=True + class sbcs_zero_to_zero(AArch64BasicArithmetic): # pylint: disable=missing-docstring,invalid-name pattern = "sbcs xzr, , xzr" inputs = ["Xa"] @@ -1980,6 +2007,12 @@ class csel_xzr_ne(AArch64ConditionalSelect): # pylint: disable=missing-docstring outputs = ["Xd"] dependsOnFlags=True +class csel_xzr2_ne(AArch64ConditionalSelect): # pylint: disable=missing-docstring,invalid-name + pattern = "csel , xzr, , " + inputs = ["Xe"] + outputs = ["Xd"] + dependsOnFlags=True + class csel_ne(AArch64ConditionalSelect): # pylint: disable=missing-docstring,invalid-name pattern = "csel , , , " inputs = ["Xe", "Xf"] @@ -2125,6 +2158,11 @@ class tst_xform(Tst): # pylint: disable=missing-docstring,invalid-name inputs = ["Xa", "Xb"] modifiesFlags=True +class cmp(Tst): # pylint: disable=missing-docstring,invalid-name + pattern = "cmp , " + inputs = ["Xa","Xb"] + modifiesFlags=True + class cmp_xzr(Tst): # pylint: disable=missing-docstring,invalid-name pattern = "cmp , xzr" inputs = ["Xa"] diff --git a/slothy/targets/aarch64/neoverse_n1_experimental.py b/slothy/targets/aarch64/neoverse_n1_experimental.py index 76dd0b21..ef7c62a6 100644 --- a/slothy/targets/aarch64/neoverse_n1_experimental.py +++ b/slothy/targets/aarch64/neoverse_n1_experimental.py @@ -121,6 +121,7 @@ def get_min_max_objective(slothy): Tst : ExecutionUnit.I(), AArch64ShiftedArithmetic : ExecutionUnit.M(), Fmov : ExecutionUnit.M(), + umull_wform : ExecutionUnit.M(), (AArch64HighMultiply, AArch64Multiply) : ExecutionUnit.M(), vdup : ExecutionUnit.M(), @@ -158,6 +159,7 @@ def get_min_max_objective(slothy): (AArch64HighMultiply) : 4, (AArch64Multiply) : 3, (vdup) : 1, + umull_wform : 1, } default_latencies = { @@ -193,6 +195,7 @@ def get_min_max_objective(slothy): AArch64HighMultiply : 5, AArch64Multiply : 4, (vdup) : 3, + umull_wform : 2, } def get_latency(src, out_idx, dst):