diff --git a/gcc/common/config/riscv/riscv-common.c b/gcc/common/config/riscv/riscv-common.c index 0ec067769847..e3f125c741de 100644 --- a/gcc/common/config/riscv/riscv-common.c +++ b/gcc/common/config/riscv/riscv-common.c @@ -44,6 +44,7 @@ struct riscv_subset_t struct riscv_subset_t *next; bool explicit_version_p; + bool implied_p; }; /* Type for implied ISA info. */ @@ -57,20 +58,93 @@ struct riscv_implied_info_t riscv_implied_info_t riscv_implied_info[] = { {"d", "f"}, - {"k", "zkn"}, - {"k", "zkr"}, + {"f", "zicsr"}, + {"d", "zicsr"}, + {"zk", "zkn"}, + {"zk", "zkr"}, + {"zk", "zkt"}, + {"zkn", "zbkb"}, + {"zkn", "zbkc"}, + {"zkn", "zbkx"}, {"zkn", "zkne"}, {"zkn", "zknd"}, {"zkn", "zknh"}, - {"zkn", "zkg"}, - {"zkn", "zkb"}, + {"zks", "zbkb"}, + {"zks", "zbkc"}, + {"zks", "zbkx"}, {"zks", "zksed"}, {"zks", "zksh"}, - {"zks", "zkg"}, - {"zks", "zkb"}, {NULL, NULL} }; +/* This structure holds version information for specific ISA version. */ + +struct riscv_ext_version +{ + const char *name; + enum riscv_isa_spec_class isa_spec_class; + int major_version; + int minor_version; +}; + +/* All standard extensions defined in all supported ISA spec. */ +static const struct riscv_ext_version riscv_ext_version_table[] = +{ + /* name, ISA spec, major version, minor_version. */ + {"e", ISA_SPEC_CLASS_20191213, 1, 9}, + {"e", ISA_SPEC_CLASS_20190608, 1, 9}, + {"e", ISA_SPEC_CLASS_2P2, 1, 9}, + + {"i", ISA_SPEC_CLASS_20191213, 2, 1}, + {"i", ISA_SPEC_CLASS_20190608, 2, 1}, + {"i", ISA_SPEC_CLASS_2P2, 2, 0}, + + {"m", ISA_SPEC_CLASS_20191213, 2, 0}, + {"m", ISA_SPEC_CLASS_20190608, 2, 0}, + {"m", ISA_SPEC_CLASS_2P2, 2, 0}, + + {"a", ISA_SPEC_CLASS_20191213, 2, 1}, + {"a", ISA_SPEC_CLASS_20190608, 2, 0}, + {"a", ISA_SPEC_CLASS_2P2, 2, 0}, + + {"f", ISA_SPEC_CLASS_20191213, 2, 2}, + {"f", ISA_SPEC_CLASS_20190608, 2, 2}, + {"f", ISA_SPEC_CLASS_2P2, 2, 0}, + + {"d", ISA_SPEC_CLASS_20191213, 2, 2}, + {"d", ISA_SPEC_CLASS_20190608, 2, 2}, + {"d", ISA_SPEC_CLASS_2P2, 2, 0}, + + {"c", ISA_SPEC_CLASS_20191213, 2, 0}, + {"c", ISA_SPEC_CLASS_20190608, 2, 0}, + {"c", ISA_SPEC_CLASS_2P2, 2, 0}, + + {"zicsr", ISA_SPEC_CLASS_20191213, 2, 0}, + {"zicsr", ISA_SPEC_CLASS_20190608, 2, 0}, + + {"zifencei", ISA_SPEC_CLASS_20191213, 2, 0}, + {"zifencei", ISA_SPEC_CLASS_20190608, 2, 0}, + + {"zba", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zbb", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zbc", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zbs", ISA_SPEC_CLASS_NONE, 1, 0}, + + {"zbkb", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zbkc", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zbkx", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zkne", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zknd", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zknh", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zkr", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zksed", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zksh", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zkt", ISA_SPEC_CLASS_NONE, 1, 0}, + + /* Terminate the list. */ + {NULL, ISA_SPEC_CLASS_NONE, 0, 0} +}; + static const riscv_cpu_info riscv_cpu_tables[] = { #define RISCV_CORE(CORE_NAME, ARCH, TUNE) \ @@ -100,20 +174,22 @@ class riscv_subset_list riscv_subset_list (const char *, location_t); - const char *parsing_subset_version (const char *, unsigned *, unsigned *, - unsigned, unsigned, bool, bool *); + const char *parsing_subset_version (const char *, const char *, unsigned *, + unsigned *, bool, bool *); const char *parse_std_ext (const char *); const char *parse_multiletter_ext (const char *, const char *, const char *); - void handle_implied_ext (const char *, int, int, bool); + void handle_implied_ext (riscv_subset_t *); public: ~riscv_subset_list (); - void add (const char *, int, int, bool); + void add (const char *, int, int, bool, bool); + + void add (const char *, bool); riscv_subset_t *lookup (const char *, int major_version = RISCV_DONT_CARE_VERSION, @@ -133,7 +209,7 @@ static riscv_subset_list *current_subset_list = NULL; riscv_subset_t::riscv_subset_t () : name (), major_version (0), minor_version (0), next (NULL), - explicit_version_p (false) + explicit_version_p (false), implied_p (false) { } @@ -283,8 +359,31 @@ subset_cmp (const std::string &a, const std::string &b) void riscv_subset_list::add (const char *subset, int major_version, - int minor_version, bool explicit_version_p) + int minor_version, bool explicit_version_p, + bool implied_p) { + riscv_subset_t *ext = lookup (subset); + + if (ext) + { + if (ext->implied_p) + { + /* We won't add impiled `ext` if it already in list. */ + gcc_assert (!implied_p); + ext->implied_p = implied_p; + ext->major_version = major_version; + ext->minor_version = minor_version; + } + else + error_at ( + m_loc, + "%<-march=%s%>: Extension `%s' appear more than one time.", + m_arch, + subset); + + return; + } + riscv_subset_t *s = new riscv_subset_t (); riscv_subset_t *itr; @@ -295,6 +394,7 @@ riscv_subset_list::add (const char *subset, int major_version, s->major_version = major_version; s->minor_version = minor_version; s->explicit_version_p = explicit_version_p; + s->implied_p = implied_p; s->next = NULL; if (m_tail == NULL) @@ -339,6 +439,43 @@ riscv_subset_list::add (const char *subset, int major_version, m_tail = s; } +static void +get_default_version (const char *ext, + unsigned int *major_version, + unsigned int *minor_version) +{ + const riscv_ext_version *ext_ver; + for (ext_ver = &riscv_ext_version_table[0]; + ext_ver->name != NULL; + ++ext_ver) + if (strcmp (ext, ext_ver->name) == 0) + { + if ((ext_ver->isa_spec_class == riscv_isa_spec) || + (ext_ver->isa_spec_class == ISA_SPEC_CLASS_NONE)) + { + *major_version = ext_ver->major_version; + *minor_version = ext_ver->minor_version; + return; + } + } + + /* Not found version info. */ + *major_version = 0; + *minor_version = 0; +} + +/* Add new subset to list, but using default version from ISA spec version. */ + +void +riscv_subset_list::add (const char *subset, bool implied_p) +{ + unsigned int major_version = 0, minor_version = 0; + + get_default_version (subset, &major_version, &minor_version); + + add (subset, major_version, minor_version, false, implied_p); +} + /* Convert subset info to string with explicit version info, VERSION_P to determine append version info or not. */ @@ -349,10 +486,37 @@ riscv_subset_list::to_string (bool version_p) const oss << "rv" << m_xlen; bool first = true; - riscv_subset_t *subset = m_head; + riscv_subset_t *subset; + + bool skip_zifencei = false; + bool skip_zicsr = false; + + /* For RISC-V ISA version 2.2 or earlier version, zicsr and zifencei is + included in the base ISA. */ + if (riscv_isa_spec == ISA_SPEC_CLASS_2P2) + { + skip_zifencei = true; + skip_zicsr = true; + } - while (subset != NULL) +#ifndef HAVE_AS_MISA_SPEC + /* Skip since older binutils doesn't recognize zicsr. */ + skip_zicsr = true; +#endif +#ifndef HAVE_AS_MARCH_ZIFENCE + /* Skip since older binutils doesn't recognize zifencei, we made + a mistake in that binutils 2.35 supports zicsr but not zifencei. */ + skip_zifencei = true; +#endif + + for (subset = m_head; subset != NULL; subset = subset->next) { + if (subset->implied_p && skip_zifencei && subset->name == "zifencei") + continue; + + if (subset->implied_p && skip_zicsr && subset->name == "zicsr") + continue; + /* For !version_p, we only separate extension with underline for multi-letter extension. */ if (!first && @@ -364,12 +528,12 @@ riscv_subset_list::to_string (bool version_p) const oss << subset->name; - if (version_p || subset->explicit_version_p) + /* Let binutils decide the extension version if we don't know. */ + if ((version_p || subset->explicit_version_p) && + (subset->major_version != 0 || subset->minor_version != 0)) oss << subset->major_version << 'p' << subset->minor_version; - - subset = subset->next; } return oss.str (); @@ -417,23 +581,21 @@ riscv_supported_std_ext (void) Points to the end of version Arguments: + `ext`: This extension. `p`: Current parsing position. `major_version`: Parsing result of major version, using default_major_version if version is not present in arch string. `minor_version`: Parsing result of minor version, set to 0 if version is not present in arch string, but set to `default_minor_version` if `major_version` using default_major_version. - `default_major_version`: Default major version. - `default_minor_version`: Default minor version. `std_ext_p`: True if parsing std extension. `explicit_version_p`: True if this subset is not using default version. */ const char * -riscv_subset_list::parsing_subset_version (const char *p, +riscv_subset_list::parsing_subset_version (const char *ext, + const char *p, unsigned *major_version, unsigned *minor_version, - unsigned default_major_version, - unsigned default_minor_version, bool std_ext_p, bool *explicit_version_p) { @@ -484,11 +646,7 @@ riscv_subset_list::parsing_subset_version (const char *p, minor = version; if (major == 0 && minor == 0) - { - /* We didn't find any version string, use default version. */ - *major_version = default_major_version; - *minor_version = default_minor_version; - } + get_default_version (ext, major_version, minor_version); else { *explicit_version_p = true; @@ -522,23 +680,17 @@ riscv_subset_list::parse_std_ext (const char *p) { case 'i': p++; - p = parsing_subset_version (p, &major_version, &minor_version, - /* default_major_version= */ 2, - /* default_minor_version= */ 0, - /* std_ext_p= */ true, - &explicit_version_p); - add ("i", major_version, minor_version, explicit_version_p); + p = parsing_subset_version ("i", p, &major_version, &minor_version, + /* std_ext_p= */ true, &explicit_version_p); + add ("i", major_version, minor_version, explicit_version_p, false); break; case 'e': p++; - p = parsing_subset_version (p, &major_version, &minor_version, - /* default_major_version= */ 1, - /* default_minor_version= */ 9, - /* std_ext_p= */ true, - &explicit_version_p); + p = parsing_subset_version ("e", p, &major_version, &minor_version, + /* std_ext_p= */ true, &explicit_version_p); - add ("e", major_version, minor_version, explicit_version_p); + add ("e", major_version, minor_version, explicit_version_p, false); if (m_xlen > 32) { @@ -550,18 +702,26 @@ riscv_subset_list::parse_std_ext (const char *p) case 'g': p++; - p = parsing_subset_version (p, &major_version, &minor_version, - /* default_major_version= */ 2, - /* default_minor_version= */ 0, - /* std_ext_p= */ true, - &explicit_version_p); - add ("i", major_version, minor_version, explicit_version_p); - - for (; *std_exts != 'q'; std_exts++) + p = parsing_subset_version ("g", p, &major_version, &minor_version, + /* std_ext_p= */ true, &explicit_version_p); + if (major_version != 0 || minor_version != 0) { - const char subset[] = {*std_exts, '\0'}; - add (subset, major_version, minor_version, explicit_version_p); + warning_at (m_loc, 0, "version of `g` will be omitted, please " + "specify version for individual extension."); } + + /* We have special rule for G, we disallow rv32gm2p but allow rv32g_zicsr + here, basically we treating G expand to imafd and implied zicsr and + zifencei. */ + + add ("i", false); + add ("m", false); + add ("a", false); + add ("f", false); + add ("d", false); + add ("zicsr", true); + add ("zifencei", true); + break; default: @@ -604,44 +764,47 @@ riscv_subset_list::parse_std_ext (const char *p) std_exts++; p++; - p = parsing_subset_version (p, &major_version, &minor_version, - /* default_major_version= */ 2, - /* default_minor_version= */ 0, - /* std_ext_p= */ true, - &explicit_version_p); - subset[0] = std_ext; - add (subset, major_version, minor_version, explicit_version_p); + p = parsing_subset_version (subset, p, &major_version, &minor_version, + /* std_ext_p= */ true, &explicit_version_p); + + add (subset, major_version, minor_version, explicit_version_p, false); } return p; } -/* Check any implied extensions for EXT with version - MAJOR_VERSION.MINOR_VERSION, EXPLICIT_VERSION_P indicate the version is - explicitly given by user or not. */ +/* Check any implied extensions for EXT. */ void -riscv_subset_list::handle_implied_ext (const char *ext, - int major_version, - int minor_version, - bool explicit_version_p) +riscv_subset_list::handle_implied_ext (riscv_subset_t *ext) { riscv_implied_info_t *implied_info; for (implied_info = &riscv_implied_info[0]; implied_info->ext; ++implied_info) { - if (strcmp (ext, implied_info->ext) != 0) + if (strcmp (ext->name.c_str (), implied_info->ext) != 0) continue; /* Skip if implied extension already present. */ if (lookup (implied_info->implied_ext)) continue; - /* TODO: Implied extension might use different version. */ - add (implied_info->implied_ext, major_version, minor_version, - explicit_version_p); + /* Version of implied extension will get from current ISA spec + version. */ + add (implied_info->implied_ext, true); + } + + /* For RISC-V ISA version 2.2 or earlier version, zicsr and zifence is + included in the base ISA. */ + if (riscv_isa_spec == ISA_SPEC_CLASS_2P2) + { + if (lookup ("zicsr") == NULL) + add ("zicsr", true); + + if (lookup ("zifencei") == NULL) + add ("zifencei", true); } } @@ -679,16 +842,21 @@ riscv_subset_list::parse_multiletter_ext (const char *p, char *q = subset; const char *end_of_version; bool explicit_version_p = false; + char *ext; + char backup; while (*++q != '\0' && *q != '_' && !ISDIGIT (*q)) ; + backup = *q; + *q = '\0'; + ext = xstrdup (subset); + *q = backup; + end_of_version - = parsing_subset_version (q, &major_version, &minor_version, - /* default_major_version= */ 2, - /* default_minor_version= */ 0, - /* std_ext_p= */ FALSE, - &explicit_version_p); + = parsing_subset_version (ext, q, &major_version, &minor_version, + /* std_ext_p= */ false, &explicit_version_p); + free (ext); *q = '\0'; @@ -700,7 +868,7 @@ riscv_subset_list::parse_multiletter_ext (const char *p, return NULL; } - add (subset, major_version, minor_version, explicit_version_p); + add (subset, major_version, minor_version, explicit_version_p, false); free (subset); p += end_of_version - subset; @@ -779,11 +947,7 @@ riscv_subset_list::parse (const char *arch, location_t loc) for (itr = subset_list->m_head; itr != NULL; itr = itr->next) { - subset_list->handle_implied_ext ( - itr->name.c_str (), - itr->major_version, - itr->minor_version, - itr->explicit_version_p); + subset_list->handle_implied_ext (itr); } return subset_list; @@ -824,14 +988,24 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] = {"d", &gcc_options::x_target_flags, MASK_DOUBLE_FLOAT}, {"c", &gcc_options::x_target_flags, MASK_RVC}, - {"zkg", &gcc_options::x_riscv_crypto_subext, MASK_ZKG}, - {"zkb", &gcc_options::x_riscv_crypto_subext, MASK_ZKB}, - {"zkr", &gcc_options::x_riscv_crypto_subext, MASK_ZKR}, - {"zkne", &gcc_options::x_riscv_crypto_subext, MASK_ZKNE}, - {"zknd", &gcc_options::x_riscv_crypto_subext, MASK_ZKND}, - {"zknh", &gcc_options::x_riscv_crypto_subext, MASK_ZKNH}, + {"zicsr", &gcc_options::x_riscv_zi_subext, MASK_ZICSR}, + {"zifencei", &gcc_options::x_riscv_zi_subext, MASK_ZIFENCEI}, + + {"zba", &gcc_options::x_riscv_bitmanip_subext, MASK_ZBA}, + {"zbb", &gcc_options::x_riscv_bitmanip_subext, MASK_ZBB}, + {"zbc", &gcc_options::x_riscv_bitmanip_subext, MASK_ZBC}, + {"zbs", &gcc_options::x_riscv_bitmanip_subext, MASK_ZBS}, + + {"zbkb", &gcc_options::x_riscv_crypto_subext, MASK_ZBKB}, + {"zbkc", &gcc_options::x_riscv_crypto_subext, MASK_ZBKC}, + {"zbkx", &gcc_options::x_riscv_crypto_subext, MASK_ZBKX}, + {"zknd", &gcc_options::x_riscv_crypto_subext, MASK_ZKND}, + {"zkne", &gcc_options::x_riscv_crypto_subext, MASK_ZKNE}, + {"zknh", &gcc_options::x_riscv_crypto_subext, MASK_ZKNH}, + {"zkr", &gcc_options::x_riscv_crypto_subext, MASK_ZKR}, {"zksed", &gcc_options::x_riscv_crypto_subext, MASK_ZKSED}, - {"zksh", &gcc_options::x_riscv_crypto_subext, MASK_ZKSH}, + {"zksh", &gcc_options::x_riscv_crypto_subext, MASK_ZKSH}, + {"zkt", &gcc_options::x_riscv_crypto_subext, MASK_ZKT}, {NULL, NULL, 0} }; diff --git a/gcc/config.gcc b/gcc/config.gcc index 8d0e4de710ff..bbcec96005cd 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -526,6 +526,7 @@ pru-*-*) riscv*) cpu_type=riscv extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o" + extra_headers="rvintrin.h" d_target_objs="riscv-d.o" ;; rs6000*-*-*) @@ -4496,7 +4497,7 @@ case "${target}" in ;; riscv*-*-*) - supported_defaults="abi arch tune riscv_attribute" + supported_defaults="abi arch tune riscv_attribute isa_spec" case "${target}" in riscv-* | riscv32*) xlen=32 ;; @@ -4504,6 +4505,21 @@ case "${target}" in *) echo "Unsupported RISC-V target ${target}" 1>&2; exit 1 ;; esac + case "${with_isa_spec}" in + ""|default|2.2) + tm_defines="${tm_defines} TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_2P2" + ;; + 20191213 | 201912) + tm_defines="${tm_defines} TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_20191213" + ;; + 20190608 | 201906) + tm_defines="${tm_defines} TARGET_DEFAULT_ISA_SPEC=ISA_SPEC_CLASS_20190608" + ;; + *) + echo "--with-isa-spec only accept 2.2, 20191213, 201912, 20190608 or 201906" 1>&2 + exit 1 + esac + case "${with_riscv_attribute}" in yes) tm_defines="${tm_defines} TARGET_RISCV_ATTRIBUTE=1" diff --git a/gcc/config.in b/gcc/config.in index 364eba477374..8b8a5c52a620 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -643,6 +643,18 @@ #endif +/* Define if your assembler supports -misa-spec=. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_MISA_SPEC +#endif + + +/* Define if your assembler supports -march=rv*_zifencei. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_MARCH_ZIFENCEI +#endif + + /* Define if your assembler supports relocs needed by -fpic. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_SMALL_PIC_RELOCS diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md new file mode 100644 index 000000000000..f7ad5d312cc1 --- /dev/null +++ b/gcc/config/riscv/bitmanip.md @@ -0,0 +1,335 @@ +;; Machine description for RISC-V Bit Manipulation operations. +;; Copyright (C) 2019 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_code_iterator bitmanip_bitwise [and ior]) + +(define_code_iterator any_minmax [smin smax umin umax]) + +(define_code_iterator clz_ctz_pcnt [clz ctz popcount]) + +(define_code_attr bitmanip_optab [(smin "smin") + (smax "smax") + (umin "umin") + (umax "umax") + (clz "clz") + (ctz "ctz") + (popcount "popcount")]) + +(define_code_attr bitmanip_insn [(smin "min") + (smax "max") + (umin "minu") + (umax "maxu") + (clz "clz") + (ctz "ctz") + (popcount "cpop")]) + +(define_mode_attr shiftm1 [(SI "const31_operand") (DI "const63_operand")]) + +(define_insn "si2" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz_ctz_pcnt:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_ZBB" + { return TARGET_64BIT ? "w\t%0,%1" : "\t%0,%1"; } + [(set_attr "type" "bitmanip")]) + +(define_insn "*disi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (clz_ctz_pcnt:SI (match_operand:SI 1 "register_operand" "r"))))] + "TARGET_64BIT && TARGET_ZBB" + "w\t%0,%1" + [(set_attr "type" "bitmanip")]) + +(define_insn "di2" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz_ctz_pcnt:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT && TARGET_ZBB" + "\t%0,%1" + [(set_attr "type" "bitmanip")]) + +(define_insn "*_not" + [(set (match_operand:X 0 "register_operand" "=r") + (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r")) + (match_operand:X 2 "register_operand" "r")))] + "TARGET_ZBB || TARGET_ZBKB" + "n\t%0,%2,%1" + [(set_attr "type" "bitmanip")]) + +(define_insn "*xor_not" + [(set (match_operand:X 0 "register_operand" "=r") + (not:X (xor:X (match_operand:X 1 "register_operand" "r") + (match_operand:X 2 "register_operand" "r"))))] + "TARGET_ZBB || TARGET_ZBKB" + "xnor\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;;; ??? pack + +(define_insn "*zero_extendhi2_bitmanip" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ZBB || TARGET_ZBKB" + "@ + zext.h\t%0,%1 + lhu\t%0,%1" + [(set_attr "type" "bitmanip,load")]) + +(define_insn "*zero_extendsidi2_bitmanip" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] + "TARGET_64BIT && TARGET_ZBA" + "@ + zext.w\t%0,%1 + lwu\t%0,%1" + [(set_attr "type" "bitmanip,load")]) + +(define_insn "3" + [(set (match_operand:X 0 "register_operand" "=r") + (any_minmax:X (match_operand:X 1 "register_operand" "r") + (match_operand:X 2 "register_operand" "r")))] + "TARGET_ZBB" + "\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bset" + [(set (match_operand:X 0 "register_operand" "=r") + (ior:X (ashift:X (const_int 1) + (match_operand:QI 2 "register_operand" "r")) + (match_operand:X 1 "register_operand" "r")))] + "TARGET_ZBS" + "bset\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bset_mask" + [(set (match_operand:X 0 "register_operand" "=r") + (ior:X (ashift:X (const_int 1) + (subreg:QI + (and:X (match_operand:X 2 "register_operand" "r") + (match_operand 3 "" "i")) 0)) + (match_operand:X 1 "register_operand" "r")))] + "TARGET_ZBS" + "bset\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bset_1" + [(set (match_operand:X 0 "register_operand" "=r") + (ashift:X (const_int 1) + (match_operand:QI 1 "register_operand" "r")))] + "TARGET_ZBS" + "bset\t%0,x0,%1" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bset_1_mask" + [(set (match_operand:X 0 "register_operand" "=r") + (ashift:X (const_int 1) + (subreg:QI + (and:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "" "i")) 0)))] + "TARGET_ZBS" + "bset\t%0,x0,%1" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bseti" + [(set (match_operand:X 0 "register_operand" "=r") + (ior:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "single_bit_mask_operand" "i")))] + "TARGET_ZBS" + "bseti\t%0,%1,%S2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bclr" + [(set (match_operand:X 0 "register_operand" "=r") + (and:X (rotate:X (const_int -2) + (match_operand:QI 2 "register_operand" "r")) + (match_operand:X 1 "register_operand" "r")))] + "TARGET_ZBS" + "bclr\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bclri" + [(set (match_operand:X 0 "register_operand" "=r") + (and:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "not_single_bit_mask_operand" "i")))] + "TARGET_ZBS" + "bclri\t%0,%1,%T2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*binv" + [(set (match_operand:X 0 "register_operand" "=r") + (xor:X (ashift:X (const_int 1) + (match_operand:QI 2 "register_operand" "r")) + (match_operand:X 1 "register_operand" "r")))] + "TARGET_ZBS" + "binv\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*binvi" + [(set (match_operand:X 0 "register_operand" "=r") + (xor:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "single_bit_mask_operand" "i")))] + "TARGET_ZBS" + "binvi\t%0,%1,%S2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bext" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X (match_operand:X 1 "register_operand" "r") + (const_int 1) + (zero_extend:X + (match_operand:QI 2 "register_operand" "r"))))] + "TARGET_ZBS" + "bext\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*bexti" + [(set (match_operand:X 0 "register_operand" "=r") + (zero_extract:X (match_operand:X 1 "register_operand" "r") + (const_int 1) + (match_operand 2 "immediate_operand" "i")))] + "TARGET_ZBS" + "bexti\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotatert:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "arith_operand" "rI")))] + "TARGET_ZBB || TARGET_ZBKB" + { return TARGET_64BIT ? "ror%i2w\t%0,%1,%2" : "ror%i2\t%0,%1,%2"; } + [(set_attr "type" "bitmanip")]) + +(define_insn "rotrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "arith_operand" "rI")))] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "ror%i2\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_expand "riscv_rolw" + [(match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" +{ + emit_insn (gen_rotlsi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotate:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")))] + "TARGET_ZBB || TARGET_ZBKB" + { return TARGET_64BIT ? "rolw\t%0,%1,%2" : "rol\t%0,%1,%2"; } + [(set_attr "type" "bitmanip")]) + +(define_insn "rotldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")))] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "rol\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "rotlsi3_sext" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (rotate:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r"))))] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "rolw\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;;; ??? orc_b + +(define_insn "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_ZBB || TARGET_ZBKB" +{ + if (TARGET_64BIT) + return (TARGET_ZBB || TARGET_ZBKB) ? "rev8\t%0,%1\n\tsrai\t%0,%0,32" : "rev8.w\t%0,%1"; + else + return "rev8\t%0,%1"; +} + [(set_attr "type" "bitmanip")]) + +(define_insn "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (bswap:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" + "rev8\t%0,%1" + [(set_attr "type" "bitmanip")]) + +;;; ??? clmul + +(define_insn "*shNadd" + [(set (match_operand:X 0 "register_operand" "=r") + (plus:X (ashift:X (match_operand:X 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "I")) + (match_operand:X 3 "register_operand" "r")))] + "TARGET_ZBA + && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)" + "sh%2add\t%0,%1,%3" + [(set_attr "type" "bitmanip")]) + +(define_insn "*shNadduw" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "I")) + (match_operand 3 "immediate_operand" "")) + (match_operand:DI 4 "register_operand" "r")))] + "TARGET_64BIT && TARGET_ZBA + && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3) + && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff" + "sh%2add.uw\t%0,%1,%4" + [(set_attr "type" "bitmanip")]) + +(define_insn "*add.uw" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "register_operand" "r")))] + "TARGET_64BIT && TARGET_ZBA" + "add.uw\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +(define_insn "*slliuw" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "immediate_operand" "I")) + (match_operand 3 "immediate_operand" "")))] + "TARGET_64BIT && TARGET_ZBA + && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff" + "slli.uw\t%0,%1,%2" + [(set_attr "type" "bitmanip")]) + +;; sext + +(define_insn "*extend2_bitmanip" + [(set (match_operand:SUPERQI 0 "register_operand" "=r,r") + (sign_extend:SUPERQI + (match_operand:SHORT 1 "nonimmediate_operand" " r,m")))] + "TARGET_ZBB" + "@ + sext.\t%0,%1 + l\t%0,%1" + [(set_attr "type" "bitmanip") + (set_attr "length" "4")]) diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md index 5b1a297d068f..349fbb5731fb 100644 --- a/gcc/config/riscv/crypto.md +++ b/gcc/config/riscv/crypto.md @@ -24,57 +24,62 @@ UNSPEC_AES_DSM UNSPEC_AES_ES UNSPEC_AES_ESM - UNSPEC_AES_K + UNSPEC_AES_IM + UNSPEC_AES_KS1 + UNSPEC_AES_KS2 UNSPEC_SHA_256_SIG0 UNSPEC_SHA_256_SIG1 UNSPEC_SHA_256_SUM0 UNSPEC_SHA_256_SUM1 UNSPEC_SHA_512_SIG0 + UNSPEC_SHA_512_SIG0_2 UNSPEC_SHA_512_SIG1 + UNSPEC_SHA_512_SIG1_2 UNSPEC_SHA_512_SUM0 UNSPEC_SHA_512_SUM1 UNSPEC_SM3_P0 UNSPEC_SM3_P1 UNSPEC_SM4_ED UNSPEC_SM4_KS - UNSPEC_POLLENTROPY - UNSPEC_GETNOISE ]) - ;; Zkne&Zknd - AES (RV32) (define_insn "riscv_aes32dsi" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "immediate_operand" "")] + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "immediate_operand" "")] UNSPEC_AES_DS))] "TARGET_ZKND && !TARGET_64BIT" - "aes32dsi\t%0,%1,%2") + "aes32dsi\t%0,%1,%2,%3") (define_insn "riscv_aes32dsmi" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "immediate_operand" "")] + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "immediate_operand" "")] UNSPEC_AES_DSM))] "TARGET_ZKND && !TARGET_64BIT" - "aes32dsmi\t%0,%1,%2") + "aes32dsmi\t%0,%1,%2,%3") (define_insn "riscv_aes32esi" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "immediate_operand" "")] + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] UNSPEC_AES_ES))] "TARGET_ZKNE && !TARGET_64BIT" - "aes32esi\t%0,%1,%2") + "aes32esi\t%0,%1,%2,%3") (define_insn "riscv_aes32esmi" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "immediate_operand" "")] + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "immediate_operand" "")] UNSPEC_AES_ESM))] "TARGET_ZKNE && !TARGET_64BIT" - "aes32esmi\t%0,%1,%2") + "aes32esmi\t%0,%1,%2,%3") ;; Zkne&Zknd - AES (RV64) @@ -114,7 +119,7 @@ (define_insn "riscv_aes64im" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "register_operand" "r")] - UNSPEC_AES_K))] + UNSPEC_AES_IM))] "TARGET_ZKND && TARGET_64BIT" "aes64im\t%0,%1") @@ -122,7 +127,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "immediate_operand" "")] - UNSPEC_AES_K))] + UNSPEC_AES_KS1))] "TARGET_ZKNE && TARGET_64BIT" "aes64ks1i\t%0,%1,%2") @@ -130,7 +135,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "register_operand" "r")] - UNSPEC_AES_K))] + UNSPEC_AES_KS2))] "TARGET_ZKNE && TARGET_64BIT" "aes64ks2\t%0,%1,%2") @@ -180,7 +185,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "register_operand" "r")] - UNSPEC_SHA_512_SIG0))] + UNSPEC_SHA_512_SIG0_2))] "TARGET_ZKNH && !TARGET_64BIT" "sha512sig0l\t%0,%1,%2") @@ -196,7 +201,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "register_operand" "r")] - UNSPEC_SHA_512_SIG1))] + UNSPEC_SHA_512_SIG1_2))] "TARGET_ZKNH && !TARGET_64BIT" "sha512sig1l\t%0,%1,%2") @@ -212,7 +217,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "r") (match_operand:SI 2 "register_operand" "r")] - UNSPEC_SHA_512_SUM0))] + UNSPEC_SHA_512_SUM1))] "TARGET_ZKNH && !TARGET_64BIT" "sha512sum1r\t%0,%1,%2") @@ -283,17 +288,3 @@ "TARGET_ZKSED" "sm4ks\t%0,%1,%2") - -;; Zkr - Entropy Source - -(define_insn "riscv_pollentropy_" - [(set (match_operand:X 0 "register_operand" "=r") - (unspec:X [(const_int 0)] UNSPEC_POLLENTROPY))] - "TARGET_ZKR" - "pollentropy\t%0") - -(define_insn "riscv_getnoise_" - [(set (match_operand:X 0 "register_operand" "=r") - (unspec:X [(const_int 0)] UNSPEC_GETNOISE))] - "TARGET_ZKR" - "getnoise\t%0") diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator index ee3d41f37326..6f3d60237e45 100755 --- a/gcc/config/riscv/multilib-generator +++ b/gcc/config/riscv/multilib-generator @@ -56,6 +56,22 @@ LONG_EXT_PREFIXES = ['z', 's', 'h', 'x'] # IMPLIED_EXT = { "d" : ["f"], + "f" : ["zicsr"], + "f" : ["zifencei"], + "zk" : ["zkn"], + "zk" : ["zkr"], + "zk" : ["zkt"], + "zkn" : ["zbkb"], + "zkn" : ["zbkc"], + "zkn" : ["zbkx"], + "zkn" : ["zkne"], + "zkn" : ["zknd"], + "zkn" : ["zknh"], + "zks" : ["zbkb"], + "zks" : ["zbkc"], + "zks" : ["zbkx"], + "zks" : ["zksed"], + "zks" : ["zksh"], } def arch_canonicalize(arch): diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index f764fe7ba016..b92976325370 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -73,6 +73,11 @@ the individual word-mode moves until after reload. */ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) return false; + + /* Check whether the constant can be loaded in a single + instruction with zbs extensions. */ + if (TARGET_64BIT && TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (INTVAL (op))) + return false; /* Otherwise check whether the constant can be loaded in a single instruction. */ @@ -212,3 +217,20 @@ { return riscv_gpr_save_operation_p (op); }) + +;; Predicates for the B extension. +(define_predicate "single_bit_mask_operand" + (and (match_code "const_int") + (match_test "pow2p_hwi (INTVAL (op))"))) + +(define_predicate "not_single_bit_mask_operand" + (and (match_code "const_int") + (match_test "pow2p_hwi (~INTVAL (op))"))) + +(define_predicate "const31_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 31"))) + +(define_predicate "const63_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 63"))) diff --git a/gcc/config/riscv/riscv-builtins-crypto.def b/gcc/config/riscv/riscv-builtins-crypto.def index 1a920157dc2e..8dbdfb92848c 100644 --- a/gcc/config/riscv/riscv-builtins-crypto.def +++ b/gcc/config/riscv/riscv-builtins-crypto.def @@ -18,10 +18,10 @@ along with GCC; see the file COPYING3. If not see . */ // Zkne&Zknd - AES (RV32) -DIRECT_BUILTIN (aes32dsi, RISCV_SI_FTYPE_SI_SI, crypto_zknd32), -DIRECT_BUILTIN (aes32dsmi, RISCV_SI_FTYPE_SI_SI, crypto_zknd32), -DIRECT_BUILTIN (aes32esi, RISCV_SI_FTYPE_SI_SI, crypto_zkne32), -DIRECT_BUILTIN (aes32esmi, RISCV_SI_FTYPE_SI_SI, crypto_zkne32), +DIRECT_BUILTIN (aes32dsi, RISCV_SI_FTYPE_SI_SI_SI, crypto_zknd32), +DIRECT_BUILTIN (aes32dsmi, RISCV_SI_FTYPE_SI_SI_SI, crypto_zknd32), +DIRECT_BUILTIN (aes32esi, RISCV_SI_FTYPE_SI_SI_SI, crypto_zkne32), +DIRECT_BUILTIN (aes32esmi, RISCV_SI_FTYPE_SI_SI_SI, crypto_zkne32), // Zkne&Zknd - AES(RV64) DIRECT_BUILTIN (aes64ds, RISCV_DI_FTYPE_DI_DI, crypto_zknd64), @@ -51,10 +51,10 @@ DIRECT_BUILTIN (sha512sum0r, RISCV_SI_FTYPE_SI_SI, crypto_zknh32), DIRECT_BUILTIN (sha512sum1r, RISCV_SI_FTYPE_SI_SI, crypto_zknh32), // Zknh - SHA512 (RV64) -DIRECT_BUILTIN (sha512sig0, RISCV_DI_FTYPE_DI_DI, crypto_zknh64), -DIRECT_BUILTIN (sha512sig1, RISCV_DI_FTYPE_DI_DI, crypto_zknh64), -DIRECT_BUILTIN (sha512sum0, RISCV_DI_FTYPE_DI_DI, crypto_zknh64), -DIRECT_BUILTIN (sha512sum1, RISCV_DI_FTYPE_DI_DI, crypto_zknh64), +DIRECT_BUILTIN (sha512sig0, RISCV_DI_FTYPE_DI, crypto_zknh64), +DIRECT_BUILTIN (sha512sig1, RISCV_DI_FTYPE_DI, crypto_zknh64), +DIRECT_BUILTIN (sha512sum0, RISCV_DI_FTYPE_DI, crypto_zknh64), +DIRECT_BUILTIN (sha512sum1, RISCV_DI_FTYPE_DI, crypto_zknh64), // Zksh - SM3 RISCV_BUILTIN (sm3p0_si, "sm3p0", RISCV_BUILTIN_DIRECT, RISCV_SI_FTYPE_SI, crypto_zksh32), @@ -68,8 +68,3 @@ RISCV_BUILTIN (sm4ed_di, "sm4ed", RISCV_BUILTIN_DIRECT, RISCV_DI_FTYPE_DI_SI, cr RISCV_BUILTIN (sm4ks_si, "sm4ks", RISCV_BUILTIN_DIRECT, RISCV_SI_FTYPE_SI_SI, crypto_zksed32), RISCV_BUILTIN (sm4ks_di, "sm4ks", RISCV_BUILTIN_DIRECT, RISCV_DI_FTYPE_DI_SI, crypto_zksed64), -// Zkr - Entropy Source -RISCV_BUILTIN (pollentropy_si, "pollentropy", RISCV_BUILTIN_DIRECT, RISCV_SI_FTYPE, crypto_zkr32), -RISCV_BUILTIN (pollentropy_di, "pollentropy", RISCV_BUILTIN_DIRECT, RISCV_DI_FTYPE, crypto_zkr64), -RISCV_BUILTIN (getnoise_si, "getnoise", RISCV_BUILTIN_DIRECT, RISCV_SI_FTYPE, crypto_zkr32), -RISCV_BUILTIN (getnoise_di, "getnoise", RISCV_BUILTIN_DIRECT, RISCV_DI_FTYPE, crypto_zkr64), diff --git a/gcc/config/riscv/riscv-builtins.c b/gcc/config/riscv/riscv-builtins.c index d187d20e4653..dc7c9ba135fc 100644 --- a/gcc/config/riscv/riscv-builtins.c +++ b/gcc/config/riscv/riscv-builtins.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #define RISCV_FTYPE_NAME0(A) RISCV_##A##_FTYPE #define RISCV_FTYPE_NAME1(A, B) RISCV_##A##_FTYPE_##B #define RISCV_FTYPE_NAME2(A, B, C) RISCV_##A##_FTYPE_##B##_##C +#define RISCV_FTYPE_NAME3(A, B, C, D) RISCV_##A##_FTYPE_##B##_##C##_##D /* Classifies the prototype of a built-in function. */ enum riscv_function_type { @@ -88,6 +89,8 @@ struct riscv_builtin_description { AVAIL (hard_float, TARGET_HARD_FLOAT) +AVAIL (bitmanip64, TARGET_64BIT && TARGET_BITMANIP) + AVAIL (crypto_zknd32, TARGET_ZKND && !TARGET_64BIT) AVAIL (crypto_zknd64, TARGET_ZKND && TARGET_64BIT) AVAIL (crypto_zkne32, TARGET_ZKNE && !TARGET_64BIT) @@ -146,10 +149,13 @@ AVAIL (crypto_zkr64, TARGET_ZKR && TARGET_64BIT) RISCV_ATYPE_##A, RISCV_ATYPE_##B #define RISCV_FTYPE_ATYPES2(A, B, C) \ RISCV_ATYPE_##A, RISCV_ATYPE_##B, RISCV_ATYPE_##C +#define RISCV_FTYPE_ATYPES3(A, B, C, D) \ + RISCV_ATYPE_##A, RISCV_ATYPE_##B, RISCV_ATYPE_##C, RISCV_ATYPE_##D static const struct riscv_builtin_description riscv_builtins[] = { #include "riscv-builtins-crypto.def" - + DIRECT_BUILTIN (pcntw, RISCV_SI_FTYPE_SI, bitmanip64), + DIRECT_BUILTIN (rolw, RISCV_SI_FTYPE_SI_SI, bitmanip64), DIRECT_BUILTIN (frflags, RISCV_USI_FTYPE, hard_float), DIRECT_NO_TARGET_BUILTIN (fsflags, RISCV_VOID_FTYPE_USI, hard_float) }; diff --git a/gcc/config/riscv/riscv-ftypes.def b/gcc/config/riscv/riscv-ftypes.def index 7d27530d2464..94c20ec6712b 100644 --- a/gcc/config/riscv/riscv-ftypes.def +++ b/gcc/config/riscv/riscv-ftypes.def @@ -35,3 +35,5 @@ DEF_RISCV_FTYPE (1, (DI, DI)) DEF_RISCV_FTYPE (2, (SI, SI, SI)) DEF_RISCV_FTYPE (2, (DI, DI, DI)) DEF_RISCV_FTYPE (2, (DI, DI, SI)) +DEF_RISCV_FTYPE (3, (SI, SI, SI, SI)) +DEF_RISCV_FTYPE (3, (DI, DI, DI, DI)) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 6ee08bc206f0..c48d86a6aedc 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -39,6 +39,16 @@ enum riscv_code_model { }; extern enum riscv_code_model riscv_cmodel; +enum riscv_isa_spec_class { + ISA_SPEC_CLASS_NONE, + + ISA_SPEC_CLASS_2P2, + ISA_SPEC_CLASS_20190608, + ISA_SPEC_CLASS_20191213 +}; + +extern enum riscv_isa_spec_class riscv_isa_spec; + /* Keep this list in sync with define_attr "tune" in riscv.md. */ enum riscv_microarchitecture_type { generic, @@ -51,22 +61,42 @@ enum riscv_align_data { riscv_align_data_type_natural }; -#define MASK_ZKG (1 << 0) -#define MASK_ZKB (1 << 1) -#define MASK_ZKR (1 << 2) -#define MASK_ZKNE (1 << 3) -#define MASK_ZKND (1 << 4) -#define MASK_ZKNH (1 << 5) -#define MASK_ZKSED (1 << 6) -#define MASK_ZKSH (1 << 7) - -#define TARGET_ZKG ((riscv_crypto_subext & MASK_ZKG) != 0) -#define TARGET_ZKB ((riscv_crypto_subext & MASK_ZKB) != 0) -#define TARGET_ZKR ((riscv_crypto_subext & MASK_ZKR) != 0) -#define TARGET_ZKNE ((riscv_crypto_subext & MASK_ZKNE) != 0) -#define TARGET_ZKND ((riscv_crypto_subext & MASK_ZKND) != 0) -#define TARGET_ZKNH ((riscv_crypto_subext & MASK_ZKNH) != 0) +#define MASK_ZICSR (1 << 0) +#define MASK_ZIFENCEI (1 << 1) + +#define MASK_ZBA (1 << 0) +#define MASK_ZBB (1 << 1) +#define MASK_ZBC (1 << 2) +#define MASK_ZBS (1 << 3) + +#define MASK_ZBKB (1 << 0) +#define MASK_ZBKC (1 << 1) +#define MASK_ZBKX (1 << 2) +#define MASK_ZKNE (1 << 3) +#define MASK_ZKND (1 << 4) +#define MASK_ZKNH (1 << 5) +#define MASK_ZKR (1 << 6) +#define MASK_ZKSED (1 << 7) +#define MASK_ZKSH (1 << 8) +#define MASK_ZKT (1 << 9) + +#define TARGET_ZICSR ((riscv_zi_subext & MASK_ZICSR) != 0) +#define TARGET_ZIFENCEI ((riscv_zi_subext & MASK_ZIFENCEI) != 0) + +#define TARGET_ZBA ((riscv_bitmanip_subext & MASK_ZBA) != 0) +#define TARGET_ZBB ((riscv_bitmanip_subext & MASK_ZBB) != 0) +#define TARGET_ZBC ((riscv_bitmanip_subext & MASK_ZBC) != 0) +#define TARGET_ZBS ((riscv_bitmanip_subext & MASK_ZBS) != 0) + +#define TARGET_ZBKB ((riscv_crypto_subext & MASK_ZBKB) != 0) +#define TARGET_ZBKC ((riscv_crypto_subext & MASK_ZBKC) != 0) +#define TARGET_ZBKX ((riscv_crypto_subext & MASK_ZBKX) != 0) +#define TARGET_ZKNE ((riscv_crypto_subext & MASK_ZKNE) != 0) +#define TARGET_ZKND ((riscv_crypto_subext & MASK_ZKND) != 0) +#define TARGET_ZKNH ((riscv_crypto_subext & MASK_ZKNH) != 0) +#define TARGET_ZKR ((riscv_crypto_subext & MASK_ZKR) != 0) #define TARGET_ZKSED ((riscv_crypto_subext & MASK_ZKSED) != 0) -#define TARGET_ZKSH ((riscv_crypto_subext & MASK_ZKSH) != 0) +#define TARGET_ZKSH ((riscv_crypto_subext & MASK_ZKSH) != 0) +#define TARGET_ZKT ((riscv_crypto_subext & MASK_ZKT) != 0) #endif /* ! GCC_RISCV_OPTS_H */ diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index d489717b2a51..705574655db5 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -388,6 +388,20 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS], return 1; } + /* ??? Maybe there are also other bitmanip instructions useful for loading + constants? */ + if (TARGET_64BIT) + { + if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value)) + { + /* Simply SBSET. */ + codes[0].code = UNKNOWN; + codes[0].value = value; + return 1; + } + /* ??? Can use slo/sro to load constants. */ + } + /* End with ADDI. When constructing HImode constants, do not generate any intermediate value that is not itself a valid HImode constant. The XORI case below will handle those remaining HImode constants. */ @@ -439,6 +453,47 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS], } } + if (cost > 2 && TARGET_64BIT && TARGET_ZBB) + { + int leading_ones = clz_hwi (~value); + int trailing_ones = ctz_hwi (~value); + + /* If all bits are one except a few that are zero, and the zero bits + are within a range of 11 bits, and at least one of the upper 32-bits + is a zero, then we can generate a constant by loading a small + negative constant and rotating. */ + if (leading_ones < 32 + && ((64 - leading_ones - trailing_ones) < 12)) + { + codes[0].code = UNKNOWN; + /* The sign-bit might be zero, so just rotate to be safe. */ + codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones) + | (value << (64 - trailing_ones))); + codes[1].code = ROTATERT; + codes[1].value = 64 - trailing_ones; + cost = 2; + } + /* Handle the case where the 11 bit range of zero bits wraps around. */ + else + { + int upper_trailing_ones = ctz_hwi (~value >> 32); + int lower_leading_ones = clz_hwi (~value << 32); + + if (upper_trailing_ones < 32 && lower_leading_ones < 32 + && ((64 - upper_trailing_ones - lower_leading_ones) < 12)) + { + codes[0].code = UNKNOWN; + /* The sign-bit might be zero, so just rotate to be safe. */ + codes[0].value = ((value << (32 - upper_trailing_ones)) + | ((unsigned HOST_WIDE_INT) value + >> (32 + upper_trailing_ones))); + codes[1].code = ROTATERT; + codes[1].value = 32 - upper_trailing_ones; + cost = 2; + } + } + } + gcc_assert (cost <= RISCV_MAX_INTEGER_OPS); return cost; } @@ -1735,6 +1790,14 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN { *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); return true; + } + /* This is an bext. */ + if (TARGET_ZBS && outer_code == SET + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) == 1) + { + *total = COSTS_N_INSNS (SINGLE_SHIFT_COST); + return true; } return false; @@ -2024,7 +2087,17 @@ riscv_output_move (rtx dest, rtx src) } if (src_code == CONST_INT) - return "li\t%0,%1"; + { + if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src))) + return "li\t%0,%1"; + + if (TARGET_64BIT && TARGET_ZBS + && SINGLE_BIT_MASK_OPERAND (INTVAL (src))) + return "bseti\t%0,zero,%S1"; + + /* Should never reach here. */ + abort (); + } if (src_code == HIGH) return "lui\t%0,%h1"; @@ -3234,7 +3307,7 @@ riscv_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, bool riscv_expand_block_move (rtx dest, rtx src, rtx length) { - if (CONST_INT_P (length)) + if (CONST_INT_P (length) && INTVAL (length) >= 0) { HOST_WIDE_INT factor, align; @@ -3366,7 +3439,9 @@ riscv_memmodel_needs_release_fence (enum memmodel model) 'A' Print the atomic operation suffix for memory model OP. 'F' Print a FENCE if the memory model requires a release. 'z' Print x0 if OP is zero, otherwise print OP normally. - 'i' Print i if the operand is not a register. */ + 'i' Print i if the operand is not a register. + 's' Sign-extend a 32-bit constant value to 64-bits then print. + 'S' Print shift-index of single-bit mask OP. */ static void riscv_print_operand (FILE *file, rtx op, int letter) @@ -3406,6 +3481,27 @@ riscv_print_operand (FILE *file, rtx op, int letter) fputs ("i", file); break; + case 's': + { + rtx newop = GEN_INT (INTVAL (op) | 0xffffffffUL << 32); + output_addr_const (file, newop); + break; + } + + case 'S': + { + rtx newop = GEN_INT (ctz_hwi (INTVAL (op))); + output_addr_const (file, newop); + break; + } + + case 'T': + { + rtx newop = GEN_INT (ctz_hwi (~INTVAL (op))); + output_addr_const (file, newop); + break; + } + default: switch (code) { diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 172c7ca7c98b..ef50c1cef1b5 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -70,13 +70,20 @@ extern const char *riscv_default_mtune (int argc, const char **argv); #define TARGET_64BIT (__riscv_xlen == 64) #endif /* IN_LIBGCC2 */ +#ifdef HAVE_AS_MISA_SPEC +#define ASM_MISA_SPEC "%{misa-spec=*}" +#else +#define ASM_MISA_SPEC "" +#endif + #undef ASM_SPEC #define ASM_SPEC "\ %(subtarget_asm_debugging_spec) \ %{" FPIE_OR_FPIC_SPEC ":-fpic} \ %{march=*} \ %{mabi=*} \ -%(subtarget_asm_spec)" +%(subtarget_asm_spec)" \ +ASM_MISA_SPEC #undef DRIVER_SELF_SPECS #define DRIVER_SELF_SPECS \ @@ -492,6 +499,19 @@ enum reg_class (((VALUE) | ((1UL<<31) - IMM_REACH)) == ((1UL<<31) - IMM_REACH) \ || ((VALUE) | ((1UL<<31) - IMM_REACH)) + IMM_REACH == 0) +/* The following macros use B extension instructions to load constants. */ + +/* If this is a single bit mask, then we can load it with bseti. But this + is not useful for any of the low 31 bits because we can use addi or lui + to load them. It is wrong for loading SImode 0x80000000 on rv64 because it + needs to be sign-extended. So we restrict this to the upper 32-bits + only. */ +/* ??? It is OK for DImode 0x80000000 on rv64, but we don't know the target + mode in riscv_build_integer_1 so can't handle this case separate from the + bad SImode case. */ +#define SINGLE_BIT_MASK_OPERAND(VALUE) \ + (pow2p_hwi (VALUE) && (ctz_hwi (VALUE) >= 32)) + /* Stack layout; function entry, exit and calling. */ #define STACK_GROWS_DOWNWARD 1 @@ -711,6 +731,13 @@ typedef struct { #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 +/* Configure CLZ/CTZ behavior. */ + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + /* Control the assembler format that we output. */ /* Output to assembler file text saying following lines diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 7d2edb63195c..e0d8bca6d2b3 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -43,6 +43,9 @@ UNSPEC_LRINT UNSPEC_LROUND + ;; Bitmanip + UNSPEC_PCNTW + ;; Stack tie UNSPEC_TIE ]) @@ -162,10 +165,11 @@ ;; multi multiword sequence (or user asm statements) ;; nop no operation ;; ghost an instruction that produces no real code +;; bitmanip bitmanip instructions (define_attr "type" "unknown,branch,jump,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, - fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost" + fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip" (cond [(eq_attr "got" "load") (const_string "load") ;; If a doubleword move uses these expensive instructions, @@ -1052,15 +1056,21 @@ ;; Extension insns. -(define_insn_and_split "zero_extendsidi2" +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_64BIT") + +(define_insn_and_split "*zero_extendsidi2_internal" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" " r,m")))] - "TARGET_64BIT" + "TARGET_64BIT && !TARGET_ZBA" "@ # lwu\t%0,%1" - "&& reload_completed + "&& !TARGET_ZBA + && reload_completed && REG_P (operands[1]) && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) @@ -1071,15 +1081,20 @@ [(set_attr "move_type" "shift_shift,load") (set_attr "mode" "DI")]) -(define_insn_and_split "zero_extendhi2" +(define_expand "zero_extendhi2" + [(set (match_operand:GPR 0 "register_operand") + (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand")))]) + +(define_insn_and_split "*zero_extendhi2_internal" [(set (match_operand:GPR 0 "register_operand" "=r,r") (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" " r,m")))] - "" + "!TARGET_ZBB" "@ # lhu\t%0,%1" "&& reload_completed + && !TARGET_ZBB && REG_P (operands[1]) && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) @@ -1126,11 +1141,12 @@ [(set (match_operand:SUPERQI 0 "register_operand" "=r,r") (sign_extend:SUPERQI (match_operand:SHORT 1 "nonimmediate_operand" " r,m")))] - "" + "!TARGET_ZBB" "@ # l\t%0,%1" "&& reload_completed + && !TARGET_ZBB && REG_P (operands[1]) && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) @@ -1538,14 +1554,15 @@ LCT_NORMAL, VOIDmode, operands[0], Pmode, operands[1], Pmode, const0_rtx, Pmode); #else - emit_insn (gen_fence_i ()); + if (TARGET_ZIFENCEI) + emit_insn (gen_fence_i ()); #endif DONE; }) (define_insn "fence" [(unspec_volatile [(const_int 0)] UNSPECV_FENCE)] - "" + "TARGET_ZIFENCEI" "%|fence%-") (define_insn "fence_i" @@ -1808,7 +1825,7 @@ (and:DI (match_operand:DI 1 "register_operand") (match_operand:DI 2 "high_mask_shift_operand"))) (clobber (match_operand:DI 3 "register_operand"))] - "TARGET_64BIT" + "TARGET_64BIT && !TARGET_ZBA" [(set (match_dup 3) (lshiftrt:DI (match_dup 1) (match_dup 2))) (set (match_dup 0) @@ -1828,6 +1845,7 @@ (match_operand 3 "immediate_operand" ""))) (clobber (match_scratch:DI 4 "=&r"))] "TARGET_64BIT + && !TARGET_ZBA && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)" "#" "&& reload_completed" @@ -2459,6 +2477,14 @@ "" "") +(define_insn "riscv_pcntw" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec + [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_PCNTW))] + "" + "pcntw\t%0,%1") + (define_insn "riscv_frflags" [(set (match_operand:SI 0 "register_operand" "=r") (unspec_volatile [(const_int 0)] UNSPECV_FRFLAGS))] @@ -2498,6 +2524,7 @@ [(set_attr "length" "0")] ) +(include "bitmanip.md") (include "crypto.md") ;; This fixes a failure with gcc.c-torture/execute/pr64242.c at -O2 for a diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index cd4807c1d27d..f0da9819c35a 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -138,6 +138,8 @@ Mask(RVC) Mask(RVE) +Mask(BITMANIP) + mriscv-attribute Target Report Var(riscv_emit_attribute_p) Init(-1) Emit RISC-V ELF attribute. @@ -158,3 +160,26 @@ Enum(riscv_align_data) String(natural) Value(riscv_align_data_type_natural) TargetVariable int riscv_crypto_subext + +TargetVariable +int riscv_bitmanip_subext + +TargetVariable +int riscv_zi_subext + +Enum +Name(isa_spec_class) Type(enum riscv_isa_spec_class) +Supported ISA specs (for use with the -misa-spec= option): + +EnumValue +Enum(isa_spec_class) String(2.2) Value(ISA_SPEC_CLASS_2P2) + +EnumValue +Enum(isa_spec_class) String(20190608) Value(ISA_SPEC_CLASS_20190608) + +EnumValue +Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213) + +misa-spec= +Target Report RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC) +Set the version of RISC-V ISA spec. diff --git a/gcc/config/riscv/rvintrin.h b/gcc/config/riscv/rvintrin.h new file mode 100644 index 000000000000..0f6bfef3ff42 --- /dev/null +++ b/gcc/config/riscv/rvintrin.h @@ -0,0 +1,1033 @@ +/* + * RISC-V "B" extension proposal intrinsics and emulation + * + * Copyright (C) 2019 Clifford Wolf + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * ---------------------------------------------------------------------- + * + * Define RVINTRIN_EMULATE to enable emulation mode. + * + * This header defines C inline functions with "mockup intrinsics" for + * RISC-V "B" extension proposal instructions. + * + * _rv_*(...) + * RV32/64 intrinsics that operate on the "long" data type + * + * _rv32_*(...) + * RV32/64 intrinsics that operate on the "int32_t" data type + * + * _rv64_*(...) + * RV64-only intrinsics that operate on the "int64_t" data type + * + */ + +#ifndef RVINTRIN_H +#define RVINTRIN_H + +#include +#include + +#if !defined(__riscv_xlen) && !defined(RVINTRIN_EMULATE) +# warning "Target is not RISC-V. Enabling emulation mode." +# define RVINTRIN_EMULATE 1 +#endif + +#ifndef RVINTRIN_EMULATE + +#if __riscv_xlen == 32 +# define RVINTRIN_RV32 +#endif + +#if __riscv_xlen == 64 +# define RVINTRIN_RV64 +#endif + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_clz (int32_t rs1) { int32_t rd; __asm__ ("clz %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_ctz (int32_t rs1) { int32_t rd; __asm__ ("ctz %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_pcnt (int32_t rs1) { int32_t rd; __asm__ ("pcnt %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_sext_b(int32_t rs1) { int32_t rd; __asm__ ("sext.b %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_sext_h(int32_t rs1) { int32_t rd; __asm__ ("sext.h %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_clz (int32_t rs1) { int32_t rd; __asm__ ("clzw %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_ctz (int32_t rs1) { int32_t rd; __asm__ ("ctzw %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_pcnt (int32_t rs1) { int32_t rd; __asm__ ("pcntw %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_sext_b(int32_t rs1) { int32_t rd; __asm__ ("sext.b %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv32_sext_h(int32_t rs1) { int32_t rd; __asm__ ("sext.h %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } + +static inline int64_t _rv64_clz (int64_t rs1) { int64_t rd; __asm__ ("clz %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int64_t _rv64_ctz (int64_t rs1) { int64_t rd; __asm__ ("ctz %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int64_t _rv64_pcnt (int64_t rs1) { int64_t rd; __asm__ ("pcnt %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv64_sext_b(int32_t rs1) { int32_t rd; __asm__ ("sext.b %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int32_t _rv64_sext_h(int32_t rs1) { int32_t rd; __asm__ ("sext.h %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +#endif + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_pack (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("pack %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_packu(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("packu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_packh(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("packh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bfp (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("bfp %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_pack (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("packw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_packu(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("packuw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_packh(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("packh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bfp (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("bfpw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +static inline int64_t _rv64_pack (int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("pack %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_packu(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("packu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_packh(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("packh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_bfp (int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("bfp %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +static inline int32_t _rv32_min (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("min %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_minu(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("minu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_max (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("max %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_maxu(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("maxu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +#ifdef RVINTRIN_RV64 +static inline int64_t _rv64_min (int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("min %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_minu(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("minu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_max (int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("max %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_maxu(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("maxu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_bset (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bseti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("bset %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bclr (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bclri %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("bclr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_binv (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("binvi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("binv %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bext (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bexti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("bext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_bset (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bsetiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("bsetw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bclr (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bclriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("bclrw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_binv (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("binviw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("binvw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bext (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bexti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("bextw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +static inline int64_t _rv64_bset (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bseti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("bset %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_bclr (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bclri %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("bclr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_binv (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("binvi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("binv %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_bext (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("bexti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("bext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_sll (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("slli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("sll %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_srl (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("srli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("srl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_sra (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("srai %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("sra %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_slo (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sloi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("slo %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_sro (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sroi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("sro %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_rol (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & -rs2)); else __asm__ ("rol %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_ror (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("ror %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_grev (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("grevi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("grev %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_gorc (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("gorci %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("gorc %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_shfl (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("shfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(15 & rs2)); else __asm__ ("shfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_unshfl (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("unshfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(15 & rs2)); else __asm__ ("unshfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_sll (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("slliw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("sllw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_srl (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("srliw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("srlw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_sra (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sraiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("sraw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_slo (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sloiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("slow %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_sro (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sroiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("srow %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_rol (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("roriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & -rs2)); else __asm__ ("rolw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_ror (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("roriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("rorw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_grev (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("greviw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("grevw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_gorc (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("gorciw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("gorcw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_shfl (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("shfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(15 & rs2)); else __asm__ ("shflw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_unshfl (int32_t rs1, int32_t rs2) { int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("unshfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(15 & rs2)); else __asm__ ("unshflw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +static inline int64_t _rv64_sll (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("slli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("sll %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_srl (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("srli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("srl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_sra (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("srai %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("sra %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_slo (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sloi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("slo %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_sro (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("sroi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("sro %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_rol (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & -rs2)); else __asm__ ("rol %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_ror (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("ror %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_grev (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("grevi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("grev %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_gorc (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("gorci %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & rs2)); else __asm__ ("gorc %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_shfl (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("shfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("shfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_unshfl (int64_t rs1, int64_t rs2) { int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("unshfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & rs2)); else __asm__ ("unshfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_bext(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("bext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bdep(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("bdep %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_bext(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("bextw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_bdep(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("bdepw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +static inline int64_t _rv64_bext(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("bext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_bdep(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("bdep %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_clmul (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("clmul %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_clmulh(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("clmulh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_clmulr(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("clmulr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_clmul (int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("clmulw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_clmulh(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("clmulhw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int32_t _rv32_clmulr(int32_t rs1, int32_t rs2) { int32_t rd; __asm__ ("clmulrw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +static inline int64_t _rv64_clmul (int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("clmul %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_clmulh(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("clmulh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_clmulr(int64_t rs1, int64_t rs2) { int64_t rd; __asm__ ("clmulr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +static inline long _rv_crc32_b (long rs1) { long rd; __asm__ ("crc32.b %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline long _rv_crc32_h (long rs1) { long rd; __asm__ ("crc32.h %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline long _rv_crc32_w (long rs1) { long rd; __asm__ ("crc32.w %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } + +static inline long _rv_crc32c_b(long rs1) { long rd; __asm__ ("crc32c.b %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline long _rv_crc32c_h(long rs1) { long rd; __asm__ ("crc32c.h %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline long _rv_crc32c_w(long rs1) { long rd; __asm__ ("crc32c.w %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } + +#ifdef RVINTRIN_RV64 +static inline long _rv_crc32_d (long rs1) { long rd; __asm__ ("crc32.d %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline long _rv_crc32c_d(long rs1) { long rd; __asm__ ("crc32c.d %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +#endif + +#ifdef RVINTRIN_RV64 +static inline int64_t _rv64_bmatflip(int64_t rs1) { long rd; __asm__ ("bmatflip %0, %1" : "=r"(rd) : "r"(rs1)); return rd; } +static inline int64_t _rv64_bmator (int64_t rs1, int64_t rs2) { long rd; __asm__ ("bmator %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline int64_t _rv64_bmatxor (int64_t rs1, int64_t rs2) { long rd; __asm__ ("bmatxor %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +#endif + +static inline long _rv_cmix(long rs2, long rs1, long rs3) { long rd; __asm__ ("cmix %0, %1, %2, %3" : "=r"(rd) : "r"(rs2), "r"(rs1), "r"(rs3)); return rd; } +static inline long _rv_cmov(long rs2, long rs1, long rs3) { long rd; __asm__ ("cmov %0, %1, %2, %3" : "=r"(rd) : "r"(rs2), "r"(rs1), "r"(rs3)); return rd; } + +#ifdef RVINTRIN_RV32 +static inline int32_t _rv32_fsl(int32_t rs1, int32_t rs3, int32_t rs2) +{ + int32_t rd; + if (__builtin_constant_p(rs2)) { + rs2 &= 63; + if (rs2 < 32) + __asm__ ("fsli %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "i"(rs2)); + else + __asm__ ("fsli %0, %1, %2, %3" : "=r"(rd) : "r"(rs3), "r"(rs1), "i"(rs2 & 31)); + } else { + __asm__ ("fsl %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "r"(rs2)); + } + return rd; +} + +static inline int32_t _rv32_fsr(int32_t rs1, int32_t rs3, int32_t rs2) +{ + int32_t rd; + if (__builtin_constant_p(rs2)) { + rs2 &= 63; + if (rs2 < 32) + __asm__ ("fsri %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "i"(rs2)); + else + __asm__ ("fsri %0, %1, %2, %3" : "=r"(rd) : "r"(rs3), "r"(rs1), "i"(rs2 & 31)); + } else { + __asm__ ("fsr %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "r"(rs2)); + } + return rd; +} +#endif + +#ifdef RVINTRIN_RV64 +static inline int32_t _rv32_fsl(int32_t rs1, int32_t rs3, int32_t rs2) +{ + int32_t rd; + if (__builtin_constant_p(rs2)) { + rs2 &= 63; + if (rs2 < 32) + __asm__ ("fsliw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "i"(rs2)); + else + __asm__ ("fsliw %0, %1, %2, %3" : "=r"(rd) : "r"(rs3), "r"(rs1), "i"(rs2 & 31)); + } else { + __asm__ ("fslw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "r"(rs2)); + } + return rd; +} + +static inline int32_t _rv32_fsr(int32_t rs1, int32_t rs3, int32_t rs2) +{ + int32_t rd; + if (__builtin_constant_p(rs2)) { + rs2 &= 63; + if (rs2 < 32) + __asm__ ("fsriw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "i"(rs2)); + else + __asm__ ("fsriw %0, %1, %2, %3" : "=r"(rd) : "r"(rs3), "r"(rs1), "i"(rs2 & 31)); + } else { + __asm__ ("fsrw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "r"(rs2)); + } + return rd; +} + +static inline int64_t _rv64_fsl(int64_t rs1, int64_t rs3, int64_t rs2) +{ + int64_t rd; + if (__builtin_constant_p(rs2)) { + rs2 &= 127; + if (rs2 < 64) + __asm__ ("fsli %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "i"(rs2)); + else + __asm__ ("fsli %0, %1, %2, %3" : "=r"(rd) : "r"(rs3), "r"(rs1), "i"(rs2 & 63)); + } else { + __asm__ ("fsl %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "r"(rs2)); + } + return rd; +} + +static inline int64_t _rv64_fsr(int64_t rs1, int64_t rs3, int64_t rs2) +{ + int64_t rd; + if (__builtin_constant_p(rs2)) { + rs2 &= 127; + if (rs2 < 64) + __asm__ ("fsri %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "i"(rs2)); + else + __asm__ ("fsri %0, %1, %2, %3" : "=r"(rd) : "r"(rs3), "r"(rs1), "i"(rs2 & 63)); + } else { + __asm__ ("fsr %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs3), "r"(rs2)); + } + return rd; +} +#endif + +static inline long _rv_andn(long rs1, long rs2) { long rd; __asm__ ("andn %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline long _rv_orn (long rs1, long rs2) { long rd; __asm__ ("orn %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } +static inline long _rv_xnor(long rs1, long rs2) { long rd; __asm__ ("xnor %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; } + +#else // RVINTRIN_EMULATE + +#if UINT_MAX != 0xffffffffU +# error " emulation mode only supports systems with sizeof(int) = 4." +#endif + +#if (ULLONG_MAX == 0xffffffffLLU) || (ULLONG_MAX != 0xffffffffffffffffLLU) +# error " emulation mode only supports systems with sizeof(long long) = 8." +#endif + +#if UINT_MAX == ULONG_MAX +# define RVINTRIN_RV32 +#else +# define RVINTRIN_RV64 +#endif + +#ifdef RVINTRIN_NOBUILTINS +static inline int32_t _rv32_clz(int32_t rs1) { for (int i=0; i < 32; i++) { if (1 & (rs1 >> (31-i))) return i; } return 32; } +static inline int64_t _rv64_clz(int64_t rs1) { for (int i=0; i < 64; i++) { if (1 & (rs1 >> (63-i))) return i; } return 64; } + +static inline int32_t _rv32_ctz(int32_t rs1) { for (int i=0; i < 32; i++) { if (1 & (rs1 >> i)) return i; } return 32; } +static inline int64_t _rv64_ctz(int64_t rs1) { for (int i=0; i < 64; i++) { if (1 & (rs1 >> i)) return i; } return 64; } + +static inline int32_t _rv32_pcnt(int32_t rs1) { int k=0; for (int i=0; i < 32; i++) { if (1 & (rs1 >> i)) k++; } return k; } +static inline int64_t _rv64_pcnt(int64_t rs1) { int k=0; for (int i=0; i < 64; i++) { if (1 & (rs1 >> i)) k++; } return k; } +#else +static inline int32_t _rv32_clz(int32_t rs1) { return rs1 ? __builtin_clz(rs1) : 32; } +static inline int64_t _rv64_clz(int64_t rs1) { return rs1 ? __builtin_clzll(rs1) : 64; } + +static inline int32_t _rv32_ctz(int32_t rs1) { return rs1 ? __builtin_ctz(rs1) : 32; } +static inline int64_t _rv64_ctz(int64_t rs1) { return rs1 ? __builtin_ctzll(rs1) : 64; } + +static inline int32_t _rv32_pcnt(int32_t rs1) { return __builtin_popcount(rs1); } +static inline int64_t _rv64_pcnt(int64_t rs1) { return __builtin_popcountll(rs1); } +#endif + +static inline int32_t _rv32_sext_b(int32_t rs1) { return rs1 << (32-8) >> (32-8); } +static inline int64_t _rv64_sext_b(int64_t rs1) { return rs1 << (64-8) >> (64-8); } + +static inline int32_t _rv32_sext_h(int32_t rs1) { return rs1 << (32-16) >> (32-16); } +static inline int64_t _rv64_sext_h(int64_t rs1) { return rs1 << (64-16) >> (64-16); } + +static inline int32_t _rv32_pack(int32_t rs1, int32_t rs2) { return (rs1 & 0x0000ffff) | (rs2 << 16); } +static inline int64_t _rv64_pack(int64_t rs1, int64_t rs2) { return (rs1 & 0xffffffffLL) | (rs2 << 32); } + +static inline int32_t _rv32_packu(int32_t rs1, int32_t rs2) { return ((rs1 >> 16) & 0x0000ffff) | (rs2 >> 16 << 16); } +static inline int64_t _rv64_packu(int64_t rs1, int64_t rs2) { return ((rs1 >> 32) & 0xffffffffLL) | (rs2 >> 32 << 32); } + +static inline int32_t _rv32_packh(int32_t rs1, int32_t rs2) { return (rs1 & 0xff) | ((rs2 & 0xff) << 8); } +static inline int64_t _rv64_packh(int64_t rs1, int64_t rs2) { return (rs1 & 0xff) | ((rs2 & 0xff) << 8); } + +static inline int32_t _rv32_min (int32_t rs1, int32_t rs2) { return rs1 < rs2 ? rs1 : rs2; } +static inline int32_t _rv32_minu(int32_t rs1, int32_t rs2) { return (uint32_t)rs1 < (uint32_t)rs2 ? rs1 : rs2; } +static inline int32_t _rv32_max (int32_t rs1, int32_t rs2) { return rs1 > rs2 ? rs1 : rs2; } +static inline int32_t _rv32_maxu(int32_t rs1, int32_t rs2) { return (uint32_t)rs1 > (uint32_t)rs2 ? rs1 : rs2; } + +static inline int64_t _rv64_min (int64_t rs1, int64_t rs2) { return rs1 < rs2 ? rs1 : rs2; } +static inline int64_t _rv64_minu(int64_t rs1, int64_t rs2) { return (uint64_t)rs1 < (uint64_t)rs2 ? rs1 : rs2; } +static inline int64_t _rv64_max (int64_t rs1, int64_t rs2) { return rs1 > rs2 ? rs1 : rs2; } +static inline int64_t _rv64_maxu(int64_t rs1, int64_t rs2) { return (uint64_t)rs1 > (uint64_t)rs2 ? rs1 : rs2; } + +static inline int32_t _rv32_bset (int32_t rs1, int32_t rs2) { return rs1 | (1 << (rs2 & 31)); } +static inline int32_t _rv32_bclr (int32_t rs1, int32_t rs2) { return rs1 & ~(1 << (rs2 & 31)); } +static inline int32_t _rv32_binv (int32_t rs1, int32_t rs2) { return rs1 ^ (1 << (rs2 & 31)); } +static inline int32_t _rv32_bext (int32_t rs1, int32_t rs2) { return 1 & (rs1 >> (rs2 & 31)); } + +static inline int64_t _rv64_bset (int64_t rs1, int64_t rs2) { return rs1 | (1LL << (rs2 & 63)); } +static inline int64_t _rv64_bclr (int64_t rs1, int64_t rs2) { return rs1 & ~(1LL << (rs2 & 63)); } +static inline int64_t _rv64_binv (int64_t rs1, int64_t rs2) { return rs1 ^ (1LL << (rs2 & 63)); } +static inline int64_t _rv64_bext (int64_t rs1, int64_t rs2) { return 1LL & (rs1 >> (rs2 & 63)); } + +static inline int32_t _rv32_sll (int32_t rs1, int32_t rs2) { return rs1 << (rs2 & 31); } +static inline int32_t _rv32_srl (int32_t rs1, int32_t rs2) { return (uint32_t)rs1 >> (rs2 & 31); } +static inline int32_t _rv32_sra (int32_t rs1, int32_t rs2) { return rs1 >> (rs2 & 31); } +static inline int32_t _rv32_slo (int32_t rs1, int32_t rs2) { return ~(~rs1 << (rs2 & 31)); } +static inline int32_t _rv32_sro (int32_t rs1, int32_t rs2) { return ~(~(uint32_t)rs1 >> (rs2 & 31)); } +static inline int32_t _rv32_rol (int32_t rs1, int32_t rs2) { return _rv32_sll(rs1, rs2) | _rv32_srl(rs1, -rs2); } +static inline int32_t _rv32_ror (int32_t rs1, int32_t rs2) { return _rv32_srl(rs1, rs2) | _rv32_sll(rs1, -rs2); } + +static inline int32_t _rv32_bfp(int32_t rs1, int32_t rs2) +{ + uint32_t cfg = rs2 >> 16; + int len = (cfg >> 8) & 15; + int off = cfg & 31; + len = len ? len : 16; + uint32_t mask = _rv32_slo(0, len) << off; + uint32_t data = rs2 << off; + return (data & mask) | (rs1 & ~mask); +} + +static inline int32_t _rv32_grev(int32_t rs1, int32_t rs2) +{ + uint32_t x = rs1; + int shamt = rs2 & 31; + if (shamt & 1) x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + if (shamt & 2) x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + if (shamt & 4) x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + if (shamt & 8) x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + if (shamt & 16) x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + return x; +} + +static inline int32_t _rv32_gorc(int32_t rs1, int32_t rs2) +{ + uint32_t x = rs1; + int shamt = rs2 & 31; + if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + return x; +} + +static inline uint32_t _rvintrin_shuffle32_stage(uint32_t src, uint32_t maskL, uint32_t maskR, int N) +{ + uint32_t x = src & ~(maskL | maskR); + x |= ((src << N) & maskL) | ((src >> N) & maskR); + return x; +} + +static inline int32_t _rv32_shfl(int32_t rs1, int32_t rs2) +{ + uint32_t x = rs1; + int shamt = rs2 & 15; + + if (shamt & 8) x = _rvintrin_shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + if (shamt & 4) x = _rvintrin_shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + if (shamt & 2) x = _rvintrin_shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2); + if (shamt & 1) x = _rvintrin_shuffle32_stage(x, 0x44444444, 0x22222222, 1); + + return x; +} + +static inline int32_t _rv32_unshfl(int32_t rs1, int32_t rs2) +{ + uint32_t x = rs1; + int shamt = rs2 & 15; + + if (shamt & 1) x = _rvintrin_shuffle32_stage(x, 0x44444444, 0x22222222, 1); + if (shamt & 2) x = _rvintrin_shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2); + if (shamt & 4) x = _rvintrin_shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + if (shamt & 8) x = _rvintrin_shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + + return x; +} + +static inline int64_t _rv64_sll (int64_t rs1, int64_t rs2) { return rs1 << (rs2 & 63); } +static inline int64_t _rv64_srl (int64_t rs1, int64_t rs2) { return (uint64_t)rs1 >> (rs2 & 63); } +static inline int64_t _rv64_sra (int64_t rs1, int64_t rs2) { return rs1 >> (rs2 & 63); } +static inline int64_t _rv64_slo (int64_t rs1, int64_t rs2) { return ~(~rs1 << (rs2 & 63)); } +static inline int64_t _rv64_sro (int64_t rs1, int64_t rs2) { return ~(~(uint64_t)rs1 >> (rs2 & 63)); } +static inline int64_t _rv64_rol (int64_t rs1, int64_t rs2) { return _rv64_sll(rs1, rs2) | _rv64_srl(rs1, -rs2); } +static inline int64_t _rv64_ror (int64_t rs1, int64_t rs2) { return _rv64_srl(rs1, rs2) | _rv64_sll(rs1, -rs2); } + +static inline int64_t _rv64_bfp(int64_t rs1, int64_t rs2) +{ + uint64_t cfg = (uint64_t)rs2 >> 32; + if ((cfg >> 30) == 2) + cfg = cfg >> 16; + int len = (cfg >> 8) & 31; + int off = cfg & 63; + len = len ? len : 32; + uint64_t mask = _rv64_slo(0, len) << off; + uint64_t data = rs2 << off; + return (data & mask) | (rs1 & ~mask); +} + +static inline int64_t _rv64_grev(int64_t rs1, int64_t rs2) +{ + uint64_t x = rs1; + int shamt = rs2 & 63; + if (shamt & 1) x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); + if (shamt & 2) x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); + if (shamt & 4) x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); + if (shamt & 8) x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); + if (shamt & 16) x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); + if (shamt & 32) x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + return x; +} + +static inline int64_t _rv64_gorc(int64_t rs1, int64_t rs2) +{ + uint64_t x = rs1; + int shamt = rs2 & 63; + if (shamt & 1) x |= ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); + if (shamt & 2) x |= ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); + if (shamt & 4) x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); + if (shamt & 8) x |= ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); + if (shamt & 16) x |= ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); + if (shamt & 32) x |= ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + return x; +} + +static inline uint64_t _rvintrin_shuffle64_stage(uint64_t src, uint64_t maskL, uint64_t maskR, int N) +{ + uint64_t x = src & ~(maskL | maskR); + x |= ((src << N) & maskL) | ((src >> N) & maskR); + return x; +} + +static inline int64_t _rv64_shfl(int64_t rs1, int64_t rs2) +{ + uint64_t x = rs1; + int shamt = rs2 & 31; + if (shamt & 16) x = _rvintrin_shuffle64_stage(x, 0x0000ffff00000000LL, 0x00000000ffff0000LL, 16); + if (shamt & 8) x = _rvintrin_shuffle64_stage(x, 0x00ff000000ff0000LL, 0x0000ff000000ff00LL, 8); + if (shamt & 4) x = _rvintrin_shuffle64_stage(x, 0x0f000f000f000f00LL, 0x00f000f000f000f0LL, 4); + if (shamt & 2) x = _rvintrin_shuffle64_stage(x, 0x3030303030303030LL, 0x0c0c0c0c0c0c0c0cLL, 2); + if (shamt & 1) x = _rvintrin_shuffle64_stage(x, 0x4444444444444444LL, 0x2222222222222222LL, 1); + return x; +} + +static inline int64_t _rv64_unshfl(int64_t rs1, int64_t rs2) +{ + uint64_t x = rs1; + int shamt = rs2 & 31; + if (shamt & 1) x = _rvintrin_shuffle64_stage(x, 0x4444444444444444LL, 0x2222222222222222LL, 1); + if (shamt & 2) x = _rvintrin_shuffle64_stage(x, 0x3030303030303030LL, 0x0c0c0c0c0c0c0c0cLL, 2); + if (shamt & 4) x = _rvintrin_shuffle64_stage(x, 0x0f000f000f000f00LL, 0x00f000f000f000f0LL, 4); + if (shamt & 8) x = _rvintrin_shuffle64_stage(x, 0x00ff000000ff0000LL, 0x0000ff000000ff00LL, 8); + if (shamt & 16) x = _rvintrin_shuffle64_stage(x, 0x0000ffff00000000LL, 0x00000000ffff0000LL, 16); + return x; +} + +static inline int32_t _rv32_bext(int32_t rs1, int32_t rs2) +{ + uint32_t c = 0, i = 0, data = rs1, mask = rs2; + while (mask) { + uint32_t b = mask & ~((mask | (mask-1)) + 1); + c |= (data & b) >> (_rv32_ctz(b) - i); + i += _rv32_pcnt(b); + mask -= b; + } + return c; +} + +static inline int32_t _rv32_bdep(int32_t rs1, int32_t rs2) +{ + uint32_t c = 0, i = 0, data = rs1, mask = rs2; + while (mask) { + uint32_t b = mask & ~((mask | (mask-1)) + 1); + c |= (data << (_rv32_ctz(b) - i)) & b; + i += _rv32_pcnt(b); + mask -= b; + } + return c; +} + +static inline int64_t _rv64_bext(int64_t rs1, int64_t rs2) +{ + uint64_t c = 0, i = 0, data = rs1, mask = rs2; + while (mask) { + uint64_t b = mask & ~((mask | (mask-1)) + 1); + c |= (data & b) >> (_rv64_ctz(b) - i); + i += _rv64_pcnt(b); + mask -= b; + } + return c; +} + +static inline int64_t _rv64_bdep(int64_t rs1, int64_t rs2) +{ + uint64_t c = 0, i = 0, data = rs1, mask = rs2; + while (mask) { + uint64_t b = mask & ~((mask | (mask-1)) + 1); + c |= (data << (_rv64_ctz(b) - i)) & b; + i += _rv64_pcnt(b); + mask -= b; + } + return c; +} + +static inline int32_t _rv32_clmul(int32_t rs1, int32_t rs2) +{ + uint32_t a = rs1, b = rs2, x = 0; + for (int i = 0; i < 32; i++) + if ((b >> i) & 1) + x ^= a << i; + return x; +} + +static inline int32_t _rv32_clmulh(int32_t rs1, int32_t rs2) +{ + uint32_t a = rs1, b = rs2, x = 0; + for (int i = 1; i < 32; i++) + if ((b >> i) & 1) + x ^= a >> (32-i); + return x; +} + +static inline int32_t _rv32_clmulr(int32_t rs1, int32_t rs2) +{ + uint32_t a = rs1, b = rs2, x = 0; + for (int i = 0; i < 32; i++) + if ((b >> i) & 1) + x ^= a >> (31-i); + return x; +} + +static inline int64_t _rv64_clmul(int64_t rs1, int64_t rs2) +{ + uint64_t a = rs1, b = rs2, x = 0; + for (int i = 0; i < 64; i++) + if ((b >> i) & 1) + x ^= a << i; + return x; +} + +static inline int64_t _rv64_clmulh(int64_t rs1, int64_t rs2) +{ + uint64_t a = rs1, b = rs2, x = 0; + for (int i = 1; i < 64; i++) + if ((b >> i) & 1) + x ^= a >> (64-i); + return x; +} + +static inline int64_t _rv64_clmulr(int64_t rs1, int64_t rs2) +{ + uint64_t a = rs1, b = rs2, x = 0; + for (int i = 0; i < 64; i++) + if ((b >> i) & 1) + x ^= a >> (63-i); + return x; +} + +static inline long _rvintrin_crc32(unsigned long x, int nbits) +{ + for (int i = 0; i < nbits; i++) + x = (x >> 1) ^ (0xEDB88320 & ~((x&1)-1)); + return x; +} + +static inline long _rvintrin_crc32c(unsigned long x, int nbits) +{ + for (int i = 0; i < nbits; i++) + x = (x >> 1) ^ (0x82F63B78 & ~((x&1)-1)); + return x; +} + +static inline long _rv_crc32_b(long rs1) { return _rvintrin_crc32(rs1, 8); } +static inline long _rv_crc32_h(long rs1) { return _rvintrin_crc32(rs1, 16); } +static inline long _rv_crc32_w(long rs1) { return _rvintrin_crc32(rs1, 32); } + +static inline long _rv_crc32c_b(long rs1) { return _rvintrin_crc32c(rs1, 8); } +static inline long _rv_crc32c_h(long rs1) { return _rvintrin_crc32c(rs1, 16); } +static inline long _rv_crc32c_w(long rs1) { return _rvintrin_crc32c(rs1, 32); } + +#ifdef RVINTRIN_RV64 +static inline long _rv_crc32_d (long rs1) { return _rvintrin_crc32 (rs1, 64); } +static inline long _rv_crc32c_d(long rs1) { return _rvintrin_crc32c(rs1, 64); } +#endif + +static inline int64_t _rv64_bmatflip(int64_t rs1) +{ + uint64_t x = rs1; + x = _rv64_shfl(x, 31); + x = _rv64_shfl(x, 31); + x = _rv64_shfl(x, 31); + return x; +} + +static inline int64_t _rv64_bmatxor(int64_t rs1, int64_t rs2) +{ + // transpose of rs2 + int64_t rs2t = _rv64_bmatflip(rs2); + + uint8_t u[8]; // rows of rs1 + uint8_t v[8]; // cols of rs2 + + for (int i = 0; i < 8; i++) { + u[i] = rs1 >> (i*8); + v[i] = rs2t >> (i*8); + } + + uint64_t x = 0; + for (int i = 0; i < 64; i++) { + if (_rv64_pcnt(u[i / 8] & v[i % 8]) & 1) + x |= 1LL << i; + } + + return x; +} + +static inline int64_t _rv64_bmator(int64_t rs1, int64_t rs2) +{ + // transpose of rs2 + int64_t rs2t = _rv64_bmatflip(rs2); + + uint8_t u[8]; // rows of rs1 + uint8_t v[8]; // cols of rs2 + + for (int i = 0; i < 8; i++) { + u[i] = rs1 >> (i*8); + v[i] = rs2t >> (i*8); + } + + uint64_t x = 0; + for (int i = 0; i < 64; i++) { + if ((u[i / 8] & v[i % 8]) != 0) + x |= 1LL << i; + } + + return x; +} + +static inline long _rv_cmix(long rs2, long rs1, long rs3) +{ + return (rs1 & rs2) | (rs3 & ~rs2); +} + +static inline long _rv_cmov(long rs2, long rs1, long rs3) +{ + return rs2 ? rs1 : rs3; +} + +static inline int32_t _rv32_fsl(int32_t rs1, int32_t rs3, int32_t rs2) +{ + int shamt = rs2 & 63; + uint32_t A = rs1, B = rs3; + if (shamt >= 32) { + shamt -= 32; + A = rs3; + B = rs1; + } + return shamt ? (A << shamt) | (B >> (32-shamt)) : A; +} + +static inline int32_t _rv32_fsr(int32_t rs1, int32_t rs3, int32_t rs2) +{ + int shamt = rs2 & 63; + uint32_t A = rs1, B = rs3; + if (shamt >= 32) { + shamt -= 32; + A = rs3; + B = rs1; + } + return shamt ? (A >> shamt) | (B << (32-shamt)) : A; +} + +static inline int64_t _rv64_fsl(int64_t rs1, int64_t rs3, int64_t rs2) +{ + int shamt = rs2 & 127; + uint64_t A = rs1, B = rs3; + if (shamt >= 64) { + shamt -= 64; + A = rs3; + B = rs1; + } + return shamt ? (A << shamt) | (B >> (64-shamt)) : A; +} + +static inline int64_t _rv64_fsr(int64_t rs1, int64_t rs3, int64_t rs2) +{ + int shamt = rs2 & 127; + uint64_t A = rs1, B = rs3; + if (shamt >= 64) { + shamt -= 64; + A = rs3; + B = rs1; + } + return shamt ? (A >> shamt) | (B << (64-shamt)) : A; +} + +static inline long _rv_andn(long rs1, long rs2) { return rs1 & ~rs2; } +static inline long _rv_orn (long rs1, long rs2) { return rs1 | ~rs2; } +static inline long _rv_xnor(long rs1, long rs2) { return rs1 ^ ~rs2; } + +#endif // RVINTRIN_EMULATE + +#ifdef RVINTRIN_RV32 +static inline long _rv_clz (long rs1) { return _rv32_clz (rs1); } +static inline long _rv_ctz (long rs1) { return _rv32_ctz (rs1); } +static inline long _rv_pcnt (long rs1) { return _rv32_pcnt (rs1); } +static inline long _rv_sext_b (long rs1) { return _rv32_sext_b(rs1); } +static inline long _rv_sext_h (long rs1) { return _rv32_sext_h(rs1); } + +static inline long _rv_pack (long rs1, long rs2) { return _rv32_pack (rs1, rs2); } +static inline long _rv_packu (long rs1, long rs2) { return _rv32_packu (rs1, rs2); } +static inline long _rv_packh (long rs1, long rs2) { return _rv32_packh (rs1, rs2); } +static inline long _rv_bfp (long rs1, long rs2) { return _rv32_bfp (rs1, rs2); } +static inline long _rv_min (long rs1, long rs2) { return _rv32_min (rs1, rs2); } +static inline long _rv_minu (long rs1, long rs2) { return _rv32_minu (rs1, rs2); } +static inline long _rv_max (long rs1, long rs2) { return _rv32_max (rs1, rs2); } +static inline long _rv_maxu (long rs1, long rs2) { return _rv32_maxu (rs1, rs2); } +static inline long _rv_bset (long rs1, long rs2) { return _rv32_bset (rs1, rs2); } +static inline long _rv_bclr (long rs1, long rs2) { return _rv32_bclr (rs1, rs2); } +static inline long _rv_binv (long rs1, long rs2) { return _rv32_binv (rs1, rs2); } +static inline long _rv_bext (long rs1, long rs2) { return _rv32_bext (rs1, rs2); } +static inline long _rv_sll (long rs1, long rs2) { return _rv32_sll (rs1, rs2); } +static inline long _rv_srl (long rs1, long rs2) { return _rv32_srl (rs1, rs2); } +static inline long _rv_sra (long rs1, long rs2) { return _rv32_sra (rs1, rs2); } +static inline long _rv_slo (long rs1, long rs2) { return _rv32_slo (rs1, rs2); } +static inline long _rv_sro (long rs1, long rs2) { return _rv32_sro (rs1, rs2); } +static inline long _rv_rol (long rs1, long rs2) { return _rv32_rol (rs1, rs2); } +static inline long _rv_ror (long rs1, long rs2) { return _rv32_ror (rs1, rs2); } +static inline long _rv_grev (long rs1, long rs2) { return _rv32_grev (rs1, rs2); } +static inline long _rv_gorc (long rs1, long rs2) { return _rv32_gorc (rs1, rs2); } +static inline long _rv_shfl (long rs1, long rs2) { return _rv32_shfl (rs1, rs2); } +static inline long _rv_unshfl (long rs1, long rs2) { return _rv32_unshfl (rs1, rs2); } +static inline long _rv_bext (long rs1, long rs2) { return _rv32_bext (rs1, rs2); } +static inline long _rv_bdep (long rs1, long rs2) { return _rv32_bdep (rs1, rs2); } +static inline long _rv_clmul (long rs1, long rs2) { return _rv32_clmul (rs1, rs2); } +static inline long _rv_clmulh (long rs1, long rs2) { return _rv32_clmulh (rs1, rs2); } +static inline long _rv_clmulr (long rs1, long rs2) { return _rv32_clmulr (rs1, rs2); } + +static inline long _rv_fsl(long rs1, long rs3, long rs2) { return _rv32_fsl(rs1, rs3, rs2); } +static inline long _rv_fsr(long rs1, long rs3, long rs2) { return _rv32_fsr(rs1, rs3, rs2); } +#endif + +#ifdef RVINTRIN_RV64 +static inline long _rv_clz (long rs1) { return _rv64_clz (rs1); } +static inline long _rv_ctz (long rs1) { return _rv64_ctz (rs1); } +static inline long _rv_pcnt (long rs1) { return _rv64_pcnt (rs1); } +static inline long _rv_sext_b (long rs1) { return _rv64_sext_b (rs1); } +static inline long _rv_sext_h (long rs1) { return _rv64_sext_h (rs1); } +static inline long _rv_bmatflip(long rs1) { return _rv64_bmatflip(rs1); } + +static inline long _rv_pack (long rs1, long rs2) { return _rv64_pack (rs1, rs2); } +static inline long _rv_packu (long rs1, long rs2) { return _rv64_packu (rs1, rs2); } +static inline long _rv_packh (long rs1, long rs2) { return _rv64_packh (rs1, rs2); } +static inline long _rv_bfp (long rs1, long rs2) { return _rv64_bfp (rs1, rs2); } +static inline long _rv_min (long rs1, long rs2) { return _rv64_min (rs1, rs2); } +static inline long _rv_minu (long rs1, long rs2) { return _rv64_minu (rs1, rs2); } +static inline long _rv_max (long rs1, long rs2) { return _rv64_max (rs1, rs2); } +static inline long _rv_maxu (long rs1, long rs2) { return _rv64_maxu (rs1, rs2); } +static inline long _rv_bset (long rs1, long rs2) { return _rv64_bset (rs1, rs2); } +static inline long _rv_bclr (long rs1, long rs2) { return _rv64_bclr (rs1, rs2); } +static inline long _rv_binv (long rs1, long rs2) { return _rv64_binv (rs1, rs2); } +static inline long _rv_bext (long rs1, long rs2) { return _rv64_bext (rs1, rs2); } +static inline long _rv_sll (long rs1, long rs2) { return _rv64_sll (rs1, rs2); } +static inline long _rv_srl (long rs1, long rs2) { return _rv64_srl (rs1, rs2); } +static inline long _rv_sra (long rs1, long rs2) { return _rv64_sra (rs1, rs2); } +static inline long _rv_slo (long rs1, long rs2) { return _rv64_slo (rs1, rs2); } +static inline long _rv_sro (long rs1, long rs2) { return _rv64_sro (rs1, rs2); } +static inline long _rv_rol (long rs1, long rs2) { return _rv64_rol (rs1, rs2); } +static inline long _rv_ror (long rs1, long rs2) { return _rv64_ror (rs1, rs2); } +static inline long _rv_grev (long rs1, long rs2) { return _rv64_grev (rs1, rs2); } +static inline long _rv_gorc (long rs1, long rs2) { return _rv64_gorc (rs1, rs2); } +static inline long _rv_shfl (long rs1, long rs2) { return _rv64_shfl (rs1, rs2); } +static inline long _rv_unshfl (long rs1, long rs2) { return _rv64_unshfl (rs1, rs2); } +static inline long _rv_bext (long rs1, long rs2) { return _rv64_bext (rs1, rs2); } +static inline long _rv_bdep (long rs1, long rs2) { return _rv64_bdep (rs1, rs2); } +static inline long _rv_clmul (long rs1, long rs2) { return _rv64_clmul (rs1, rs2); } +static inline long _rv_clmulh (long rs1, long rs2) { return _rv64_clmulh (rs1, rs2); } +static inline long _rv_clmulr (long rs1, long rs2) { return _rv64_clmulr (rs1, rs2); } +static inline long _rv_bmator (long rs1, long rs2) { return _rv64_bmator (rs1, rs2); } +static inline long _rv_bmatxor(long rs1, long rs2) { return _rv64_bmatxor(rs1, rs2); } + +static inline long _rv_fsl(long rs1, long rs3, long rs2) { return _rv64_fsl(rs1, rs3, rs2); } +static inline long _rv_fsr(long rs1, long rs3, long rs2) { return _rv64_fsr(rs1, rs3, rs2); } +#endif + +#ifdef RVINTRIN_RV32 + +#define RVINTRIN_GREV_PSEUDO_OP32(_arg, _name) \ + static inline long _rv_ ## _name(long rs1) { return _rv_grev (rs1, _arg); } \ + static inline int32_t _rv32_ ## _name(int32_t rs1) { return _rv32_grev(rs1, _arg); } + +#define RVINTRIN_GREV_PSEUDO_OP64(_arg, _name) + +#else + +#define RVINTRIN_GREV_PSEUDO_OP32(_arg, _name) \ + static inline int32_t _rv32_ ## _name(int32_t rs1) { return _rv32_grev(rs1, _arg); } + +#define RVINTRIN_GREV_PSEUDO_OP64(_arg, _name) \ + static inline long _rv_ ## _name(long rs1) { return _rv_grev (rs1, _arg); } \ + static inline int64_t _rv64_ ## _name(int64_t rs1) { return _rv64_grev(rs1, _arg); } +#endif + +RVINTRIN_GREV_PSEUDO_OP32( 1, rev_p) +RVINTRIN_GREV_PSEUDO_OP32( 2, rev2_n) +RVINTRIN_GREV_PSEUDO_OP32( 3, rev_n) +RVINTRIN_GREV_PSEUDO_OP32( 4, rev4_b) +RVINTRIN_GREV_PSEUDO_OP32( 6, rev2_b) +RVINTRIN_GREV_PSEUDO_OP32( 7, rev_b) +RVINTRIN_GREV_PSEUDO_OP32( 8, rev8_h) +RVINTRIN_GREV_PSEUDO_OP32(12, rev4_h) +RVINTRIN_GREV_PSEUDO_OP32(14, rev2_h) +RVINTRIN_GREV_PSEUDO_OP32(15, rev_h) +RVINTRIN_GREV_PSEUDO_OP32(16, rev16) +RVINTRIN_GREV_PSEUDO_OP32(24, rev8) +RVINTRIN_GREV_PSEUDO_OP32(28, rev4) +RVINTRIN_GREV_PSEUDO_OP32(30, rev2) +RVINTRIN_GREV_PSEUDO_OP32(31, rev) + +RVINTRIN_GREV_PSEUDO_OP64( 1, rev_p) +RVINTRIN_GREV_PSEUDO_OP64( 2, rev2_n) +RVINTRIN_GREV_PSEUDO_OP64( 3, rev_n) +RVINTRIN_GREV_PSEUDO_OP64( 4, rev4_b) +RVINTRIN_GREV_PSEUDO_OP64( 6, rev2_b) +RVINTRIN_GREV_PSEUDO_OP64( 7, rev_b) +RVINTRIN_GREV_PSEUDO_OP64( 8, rev8_h) +RVINTRIN_GREV_PSEUDO_OP64(12, rev4_h) +RVINTRIN_GREV_PSEUDO_OP64(14, rev2_h) +RVINTRIN_GREV_PSEUDO_OP64(15, rev_h) +RVINTRIN_GREV_PSEUDO_OP64(16, rev16_w) +RVINTRIN_GREV_PSEUDO_OP64(24, rev8_w) +RVINTRIN_GREV_PSEUDO_OP64(28, rev4_w) +RVINTRIN_GREV_PSEUDO_OP64(30, rev2_w) +RVINTRIN_GREV_PSEUDO_OP64(31, rev_w) +RVINTRIN_GREV_PSEUDO_OP64(32, rev32) +RVINTRIN_GREV_PSEUDO_OP64(48, rev16) +RVINTRIN_GREV_PSEUDO_OP64(56, rev8) +RVINTRIN_GREV_PSEUDO_OP64(60, rev4) +RVINTRIN_GREV_PSEUDO_OP64(62, rev2) +RVINTRIN_GREV_PSEUDO_OP64(63, rev) + +#ifdef RVINTRIN_RV32 + +#define RVINTRIN_GORC_PSEUDO_OP32(_arg, _name) \ + static inline long _rv_ ## _name(long rs1) { return _rv_gorc (rs1, _arg); } \ + static inline int32_t _rv32_ ## _name(int32_t rs1) { return _rv32_gorc(rs1, _arg); } + +#define RVINTRIN_GORC_PSEUDO_OP64(_arg, _name) + +#else + +#define RVINTRIN_GORC_PSEUDO_OP32(_arg, _name) \ + static inline int32_t _rv32_ ## _name(int32_t rs1) { return _rv32_gorc(rs1, _arg); } + +#define RVINTRIN_GORC_PSEUDO_OP64(_arg, _name) \ + static inline long _rv_ ## _name(long rs1) { return _rv_gorc (rs1, _arg); } \ + static inline int64_t _rv64_ ## _name(int64_t rs1) { return _rv64_gorc(rs1, _arg); } +#endif + +RVINTRIN_GORC_PSEUDO_OP32( 1, orc_p) +RVINTRIN_GORC_PSEUDO_OP32( 2, orc2_n) +RVINTRIN_GORC_PSEUDO_OP32( 3, orc_n) +RVINTRIN_GORC_PSEUDO_OP32( 4, orc4_b) +RVINTRIN_GORC_PSEUDO_OP32( 6, orc2_b) +RVINTRIN_GORC_PSEUDO_OP32( 7, orc_b) +RVINTRIN_GORC_PSEUDO_OP32( 8, orc8_h) +RVINTRIN_GORC_PSEUDO_OP32(12, orc4_h) +RVINTRIN_GORC_PSEUDO_OP32(14, orc2_h) +RVINTRIN_GORC_PSEUDO_OP32(15, orc_h) +RVINTRIN_GORC_PSEUDO_OP32(16, orc16) +RVINTRIN_GORC_PSEUDO_OP32(24, orc8) +RVINTRIN_GORC_PSEUDO_OP32(28, orc4) +RVINTRIN_GORC_PSEUDO_OP32(30, orc2) +RVINTRIN_GORC_PSEUDO_OP32(31, orc) + +RVINTRIN_GORC_PSEUDO_OP64( 1, orc_p) +RVINTRIN_GORC_PSEUDO_OP64( 2, orc2_n) +RVINTRIN_GORC_PSEUDO_OP64( 3, orc_n) +RVINTRIN_GORC_PSEUDO_OP64( 4, orc4_b) +RVINTRIN_GORC_PSEUDO_OP64( 6, orc2_b) +RVINTRIN_GORC_PSEUDO_OP64( 7, orc_b) +RVINTRIN_GORC_PSEUDO_OP64( 8, orc8_h) +RVINTRIN_GORC_PSEUDO_OP64(12, orc4_h) +RVINTRIN_GORC_PSEUDO_OP64(14, orc2_h) +RVINTRIN_GORC_PSEUDO_OP64(15, orc_h) +RVINTRIN_GORC_PSEUDO_OP64(16, orc16_w) +RVINTRIN_GORC_PSEUDO_OP64(24, orc8_w) +RVINTRIN_GORC_PSEUDO_OP64(28, orc4_w) +RVINTRIN_GORC_PSEUDO_OP64(30, orc2_w) +RVINTRIN_GORC_PSEUDO_OP64(31, orc_w) +RVINTRIN_GORC_PSEUDO_OP64(32, orc32) +RVINTRIN_GORC_PSEUDO_OP64(48, orc16) +RVINTRIN_GORC_PSEUDO_OP64(56, orc8) +RVINTRIN_GORC_PSEUDO_OP64(60, orc4) +RVINTRIN_GORC_PSEUDO_OP64(62, orc2) +RVINTRIN_GORC_PSEUDO_OP64(63, orc) + +#ifdef RVINTRIN_RV32 + +#define RVINTRIN_SHFL_PSEUDO_OP32(_arg, _name) \ + static inline long _rv_ ## _name(long rs1) { return _rv_shfl (rs1, _arg); } \ + static inline long _rv_un ## _name(long rs1) { return _rv_unshfl (rs1, _arg); } \ + static inline int32_t _rv32_un ## _name(int32_t rs1) { return _rv32_shfl (rs1, _arg); } \ + static inline int32_t _rv32_ ## _name(int32_t rs1) { return _rv32_unshfl(rs1, _arg); } + +#define RVINTRIN_SHFL_PSEUDO_OP64(_arg, _name) + +#else + +#define RVINTRIN_SHFL_PSEUDO_OP32(_arg, _name) + +#define RVINTRIN_SHFL_PSEUDO_OP64(_arg, _name) \ + static inline long _rv_ ## _name(long rs1) { return _rv_shfl (rs1, _arg); } \ + static inline long _rv_un ## _name(long rs1) { return _rv_unshfl (rs1, _arg); } \ + static inline int64_t _rv64_ ## _name(int64_t rs1) { return _rv64_shfl (rs1, _arg); } \ + static inline int64_t _rv64_un ## _name(int64_t rs1) { return _rv64_unshfl(rs1, _arg); } + +#endif + +RVINTRIN_SHFL_PSEUDO_OP32( 1, zip_n) +RVINTRIN_SHFL_PSEUDO_OP32( 2, zip2_b) +RVINTRIN_SHFL_PSEUDO_OP32( 3, zip_b) +RVINTRIN_SHFL_PSEUDO_OP32( 4, zip4_h) +RVINTRIN_SHFL_PSEUDO_OP32( 6, zip2_h) +RVINTRIN_SHFL_PSEUDO_OP32( 7, zip_h) +RVINTRIN_SHFL_PSEUDO_OP32( 8, zip8) +RVINTRIN_SHFL_PSEUDO_OP32(12, zip4) +RVINTRIN_SHFL_PSEUDO_OP32(14, zip2) +RVINTRIN_SHFL_PSEUDO_OP32(15, zip) + +RVINTRIN_SHFL_PSEUDO_OP64( 1, zip_n) +RVINTRIN_SHFL_PSEUDO_OP64( 2, zip2_b) +RVINTRIN_SHFL_PSEUDO_OP64( 3, zip_b) +RVINTRIN_SHFL_PSEUDO_OP64( 4, zip4_h) +RVINTRIN_SHFL_PSEUDO_OP64( 6, zip2_h) +RVINTRIN_SHFL_PSEUDO_OP64( 7, zip_h) +RVINTRIN_SHFL_PSEUDO_OP64( 8, zip8_w) +RVINTRIN_SHFL_PSEUDO_OP64(12, zip4_w) +RVINTRIN_SHFL_PSEUDO_OP64(14, zip2_w) +RVINTRIN_SHFL_PSEUDO_OP64(15, zip_w) +RVINTRIN_SHFL_PSEUDO_OP64(16, zip16) +RVINTRIN_SHFL_PSEUDO_OP64(24, zip8) +RVINTRIN_SHFL_PSEUDO_OP64(28, zip4) +RVINTRIN_SHFL_PSEUDO_OP64(30, zip2) +RVINTRIN_SHFL_PSEUDO_OP64(31, zip) + +#endif // RVINTRIN_H diff --git a/gcc/configure b/gcc/configure index 5624b8a2fc11..d850e53ae34d 100755 --- a/gcc/configure +++ b/gcc/configure @@ -28107,6 +28107,68 @@ if test $gcc_cv_as_riscv_attribute = yes; then $as_echo "#define HAVE_AS_RISCV_ATTRIBUTE 1" >>confdefs.h +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -misa-spec= support" >&5 +$as_echo_n "checking assembler for -misa-spec= support... " >&6; } +if ${gcc_cv_as_riscv_isa_spec+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_riscv_isa_spec=no + if test x$gcc_cv_as != x; then + $as_echo '' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -misa-spec=2.2 -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_riscv_isa_spec=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_riscv_isa_spec" >&5 +$as_echo "$gcc_cv_as_riscv_isa_spec" >&6; } +if test $gcc_cv_as_riscv_isa_spec = yes; then + +$as_echo "#define HAVE_AS_MISA_SPEC 1" >>confdefs.h + +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -march=rv32i_zifencei support" >&5 +$as_echo_n "checking assembler for -march=rv32i_zifencei support... " >&6; } +if ${gcc_cv_as_riscv_march_zifencei+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_riscv_march_zifencei=no + if test x$gcc_cv_as != x; then + $as_echo '' > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=rv32i_zifencei -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_riscv_march_zifencei=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_riscv_march_zifencei" >&5 +$as_echo "$gcc_cv_as_riscv_march_zifencei" >&6; } +if test $gcc_cv_as_riscv_march_zifencei = yes; then + +$as_echo "#define HAVE_AS_MARCH_ZIFENCEI 1" >>confdefs.h + fi ;; diff --git a/gcc/configure.ac b/gcc/configure.ac index e70c19ea9712..e39d81c55957 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -5088,6 +5088,16 @@ configured with --enable-newlib-nano-formatted-io.]) [.attribute stack_align,4],, [AC_DEFINE(HAVE_AS_RISCV_ATTRIBUTE, 1, [Define if your assembler supports .attribute.])]) + gcc_GAS_CHECK_FEATURE([-misa-spec= support], + gcc_cv_as_riscv_isa_spec,, + [-misa-spec=2.2],,, + [AC_DEFINE(HAVE_AS_MISA_SPEC, 1, + [Define if the assembler understands -misa-spec=.])]) + gcc_GAS_CHECK_FEATURE([-march=rv32i_zifencei support], + gcc_cv_as_riscv_march_zifencei,, + [-march=rv32i_zifencei],,, + [AC_DEFINE(HAVE_AS_MARCH_ZIFENCEI, 1, + [Define if the assembler understands -march=rv*_zifencei.])]) ;; s390*-*-*) gcc_GAS_CHECK_FEATURE([.gnu_attribute support], diff --git a/gcc/testsuite/gcc.target/riscv/Zknd-aes-01.c b/gcc/testsuite/gcc.target/riscv/Zknd-aes-01.c new file mode 100644 index 000000000000..87d0b490476d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zknd-aes-01.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zknd -mabi=ilp32 -O2" } */ + +int foo1(int rs1, int rs2) +{ + return __builtin_riscv_aes32dsi(rs1, rs2, 1); +} + +int foo2(int rs1, int rs2) +{ + return __builtin_riscv_aes32dsmi(rs1, rs2, 0); +} + +/* { dg-final { scan-assembler-times "aes32dsi" 1 } } */ +/* { dg-final { scan-assembler-times "aes32dsmi" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/Zknd-aes-02.c b/gcc/testsuite/gcc.target/riscv/Zknd-aes-02.c new file mode 100644 index 000000000000..3abe8342f9ff --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zknd-aes-02.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zknd -mabi=lp64 -O2" } */ + +long foo1(long rs1, long rs2) +{ + return __builtin_riscv_aes64ds(rs1, rs2); +} + +long foo2(long rs1, long rs2) +{ + return __builtin_riscv_aes64dsm(rs1, rs2); +} + +long foo3(long rs1) +{ + return __builtin_riscv_aes64im(rs1); +} + +/* { dg-final { scan-assembler-times "aes64ds" 2 } } */ +/* { dg-final { scan-assembler-times "aes64dsm" 1 } } */ +/* { dg-final { scan-assembler-times "aes64im" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/Zkne-aes-01.c b/gcc/testsuite/gcc.target/riscv/Zkne-aes-01.c new file mode 100644 index 000000000000..06848166f079 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zkne-aes-01.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zkne -mabi=ilp32 -O2" } */ + +int foo1(int rs1, int rs2) +{ + return __builtin_riscv_aes32esi(rs1, rs2, 1); +} + +int foo2(int rs1, int rs2) +{ + return __builtin_riscv_aes32esmi(rs1, rs2, 1); +} + +/* { dg-final { scan-assembler-times "aes32esi" 1 } } */ +/* { dg-final { scan-assembler-times "aes32esmi" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/Zkne-aes-02.c b/gcc/testsuite/gcc.target/riscv/Zkne-aes-02.c new file mode 100644 index 000000000000..8c8bf43b6804 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zkne-aes-02.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zkne -mabi=lp64 -O2" } */ + +long foo1(long rs1, long rs2) +{ + return __builtin_riscv_aes64es(rs1, rs2); +} + +long foo2(long rs1, long rs2) +{ + return __builtin_riscv_aes64esm(rs1, rs2); +} + +long foo3(long rs1) +{ + return __builtin_riscv_aes64ks1i(rs1, 1); +} + +long foo4(long rs1, long rs2) +{ + return __builtin_riscv_aes64ks2(rs1, rs2); +} + +/* { dg-final { scan-assembler-times "aes64es" 2 } } */ +/* { dg-final { scan-assembler-times "aes64esm" 1 } } */ +/* { dg-final { scan-assembler-times "aes64ks1i" 1 } } */ +/* { dg-final { scan-assembler-times "aes64ks2" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/Zknh-sha256.c b/gcc/testsuite/gcc.target/riscv/Zknh-sha256.c new file mode 100644 index 000000000000..1c1cb7be5d0d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zknh-sha256.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target { riscv64*-*-* } } } */ +/* { dg-options "-march=rv64gc_zknh -mabi=lp64 -O2" } */ + +long foo1(long rs1) +{ + return __builtin_riscv_sha256sig0(rs1); +} + +long foo2(long rs1) +{ + return __builtin_riscv_sha256sig1(rs1); +} + +long foo3(long rs1) +{ + return __builtin_riscv_sha256sum0(rs1); +} + +long foo4(long rs1) +{ + return __builtin_riscv_sha256sum1(rs1); +} + +/* { dg-final { scan-assembler-times "sha256sig0" 1 } } */ +/* { dg-final { scan-assembler-times "sha256sig1" 1 } } */ +/* { dg-final { scan-assembler-times "sha256sum0" 1 } } */ +/* { dg-final { scan-assembler-times "sha256sum1" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/Zknh-sha512-01.c b/gcc/testsuite/gcc.target/riscv/Zknh-sha512-01.c new file mode 100644 index 000000000000..ef1f6dafe60b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zknh-sha512-01.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zknh -mabi=ilp32 -O2" } */ + +int foo1(int rs1, int rs2) +{ + return __builtin_riscv_sha512sig0h(rs1, rs2); +} + +int foo2(int rs1, int rs2) +{ + return __builtin_riscv_sha512sig0l(rs1, rs2); +} + +int foo3(int rs1, int rs2) +{ + return __builtin_riscv_sha512sig1h(rs1, rs2); +} + +int foo4(int rs1, int rs2) +{ + return __builtin_riscv_sha512sig1l(rs1, rs2); +} + +int foo5(int rs1, int rs2) +{ + return __builtin_riscv_sha512sum0r(rs1, rs2); +} + +int foo6(int rs1, int rs2) +{ + return __builtin_riscv_sha512sum1r(rs1, rs2); +} + +/* { dg-final { scan-assembler-times "sha512sig0h" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sig0l" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sig1h" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sig1l" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sum0r" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sum1r" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/riscv/Zknh-sha512-02.c b/gcc/testsuite/gcc.target/riscv/Zknh-sha512-02.c new file mode 100644 index 000000000000..f25cbcfb75b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zknh-sha512-02.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64i_zknh -mabi=lp64 -O2" } */ + +long foo1(long rs1) +{ + return __builtin_riscv_sha512sig0(rs1); +} + +long foo2(long rs1) +{ + return __builtin_riscv_sha512sig1(rs1); +} + + +long foo3(long rs1) +{ + return __builtin_riscv_sha512sum0(rs1); +} + +long foo4(long rs1) +{ + return __builtin_riscv_sha512sum1(rs1); +} + +/* { dg-final { scan-assembler-times "sha512sig0" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sig1" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sum0" 1 } } */ +/* { dg-final { scan-assembler-times "sha512sum1" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/Zksed-sm4.c b/gcc/testsuite/gcc.target/riscv/Zksed-sm4.c new file mode 100644 index 000000000000..f7bb3e8dc0b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zksed-sm4.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64i_zksed -mabi=lp64 -O2" } */ + +long foo1(long rs1) +{ + return __builtin_riscv_sm4ed(rs1, 1); +} + +long foo2(long rs1) +{ + return __builtin_riscv_sm4ks(rs1, 2); +} + + + +/* { dg-final { scan-assembler-times "sm4ed" 1 } } */ +/* { dg-final { scan-assembler-times "sm4ks" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/Zksh-sm3.c b/gcc/testsuite/gcc.target/riscv/Zksh-sm3.c new file mode 100644 index 000000000000..88ef5a558164 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/Zksh-sm3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zksh -mabi=lp64 -O2" } */ + +long foo1(long rs1) +{ + return __builtin_riscv_sm3p0(rs1); +} + +long foo2(long rs1) +{ + return __builtin_riscv_sm3p1(rs1); +} + +/* { dg-final { scan-assembler-times "sm3p0" 1 } } */ +/* { dg-final { scan-assembler-times "sm3p1" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/arch-10.c b/gcc/testsuite/gcc.target/riscv/arch-10.c new file mode 100644 index 000000000000..47dbda333c9a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-10.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv32gf2 -mabi=ilp32" } */ +int foo() +{ +} +/* { dg-error "Extension `f' appear more than one time." "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/riscv/arch-11.c b/gcc/testsuite/gcc.target/riscv/arch-11.c new file mode 100644 index 000000000000..129d8f72804f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-11.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv32g_zicsr2 -mabi=ilp32" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/arch-9.c b/gcc/testsuite/gcc.target/riscv/arch-9.c new file mode 100644 index 000000000000..74e64103563d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-9.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv32g2 -mabi=ilp32" } */ +int foo() +{ +} +/* { dg-warning "version of `g` will be omitted, please specify version for individual extension." "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-11.c b/gcc/testsuite/gcc.target/riscv/attribute-11.c index a8649508b2fe..98bd8d4da42b 100644 --- a/gcc/testsuite/gcc.target/riscv/attribute-11.c +++ b/gcc/testsuite/gcc.target/riscv/attribute-11.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -mriscv-attribute -march=rv32id -mabi=ilp32" } */ +/* { dg-options "-O -mriscv-attribute -march=rv32id -mabi=ilp32 -misa-spec=2.2" } */ int foo() { } diff --git a/gcc/testsuite/gcc.target/riscv/attribute-12.c b/gcc/testsuite/gcc.target/riscv/attribute-12.c index df27fc3234d4..44fccad3b29e 100644 --- a/gcc/testsuite/gcc.target/riscv/attribute-12.c +++ b/gcc/testsuite/gcc.target/riscv/attribute-12.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -mriscv-attribute -march=rv32ifd -mabi=ilp32" } */ +/* { dg-options "-O -mriscv-attribute -march=rv32ifd -mabi=ilp32 -misa-spec=2.2" } */ int foo() { } diff --git a/gcc/testsuite/gcc.target/riscv/attribute-13.c b/gcc/testsuite/gcc.target/riscv/attribute-13.c index 1e8600132935..1b8f93ceaaf7 100644 --- a/gcc/testsuite/gcc.target/riscv/attribute-13.c +++ b/gcc/testsuite/gcc.target/riscv/attribute-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -mriscv-attribute -march=rv32if3d -mabi=ilp32" } */ +/* { dg-options "-O -mriscv-attribute -march=rv32if3d -mabi=ilp32 -misa-spec=2.2" } */ int foo() { } diff --git a/gcc/testsuite/gcc.target/riscv/attribute-14.c b/gcc/testsuite/gcc.target/riscv/attribute-14.c new file mode 100644 index 000000000000..2591c1f92f6e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/attribute-14.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mriscv-attribute -march=rv32if -mabi=ilp32 -misa-spec=20190608" } */ +int foo() +{ +} +/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p1_f2p2_zicsr2p0\"" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-15.c b/gcc/testsuite/gcc.target/riscv/attribute-15.c new file mode 100644 index 000000000000..9cae1a27a6fe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/attribute-15.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mriscv-attribute -march=rv32gc -mabi=ilp32 -misa-spec=2.2" } */ +int foo() +{ +} +/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0\"" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-16.c b/gcc/testsuite/gcc.target/riscv/attribute-16.c new file mode 100644 index 000000000000..f090363b9793 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/attribute-16.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mriscv-attribute -march=rv32gc -mabi=ilp32 -misa-spec=20190608" } */ +int foo() +{ +} +/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p1_m2p0_a2p0_f2p2_d2p2_c2p0_zicsr2p0" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-17.c b/gcc/testsuite/gcc.target/riscv/attribute-17.c new file mode 100644 index 000000000000..19ef540b5b98 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/attribute-17.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mriscv-attribute -march=rv32gc -mabi=ilp32 -misa-spec=20191213" } */ +int foo() +{ +} +/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-6.c b/gcc/testsuite/gcc.target/riscv/attribute-6.c deleted file mode 100644 index c75b0d622005..000000000000 --- a/gcc/testsuite/gcc.target/riscv/attribute-6.c +++ /dev/null @@ -1,6 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O -mriscv-attribute -march=rv32g2p0 -mabi=ilp32" } */ -int foo() -{ -} -/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_m2p0_a2p0_f2p0_d2p0\"" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-8.c b/gcc/testsuite/gcc.target/riscv/attribute-8.c index 1d1617638050..90f5a4022a0c 100644 --- a/gcc/testsuite/gcc.target/riscv/attribute-8.c +++ b/gcc/testsuite/gcc.target/riscv/attribute-8.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ -/* { dg-options "-O -mriscv-attribute -march=rv32i2p0xv5_xabc -mabi=ilp32" } */ +/* { dg-options "-O -mriscv-attribute -march=rv32i2p0xabc_xv5 -mabi=ilp32" } */ int foo() { } -/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_xv5p0_xabc2p0\"" } } */ +/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_xabc_xv5p0\"" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/attribute-9.c b/gcc/testsuite/gcc.target/riscv/attribute-9.c index bc4db0eb6472..4598872f0a68 100644 --- a/gcc/testsuite/gcc.target/riscv/attribute-9.c +++ b/gcc/testsuite/gcc.target/riscv/attribute-9.c @@ -3,4 +3,4 @@ int foo() { } -/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_sabc2p0_xbar2p0\"" } } */ +/* { dg-final { scan-assembler ".attribute arch, \"rv32i2p0_sabc_xbar\"" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-adduw.c b/gcc/testsuite/gcc.target/riscv/rvb-adduw.c new file mode 100644 index 000000000000..75ca35f35fd7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-adduw.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zba -mabi=lp64 -O2" } */ + +long +foo (int i, unsigned int j) +{ + return i + (long)j; +} + +/* { dg-final { scan-assembler "add.uw" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-li-rori.c b/gcc/testsuite/gcc.target/riscv/rvb-li-rori.c new file mode 100644 index 000000000000..a6e03faedb34 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-li-rori.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */ + +/* Expect li -126 and rori. */ +long +foo (void) +{ +return 0xffff77ffffffffffL; +} + +long +foo_2 (void) +{ +return 0x77ffffffffffffffL; +} + +long +foo_3 (void) +{ +return 0xfffffffeefffffffL; +} + +long +foo_4 (void) +{ +return 0x5ffffffffffffff5L; +} + +long +foo_5 (void) +{ +return 0xaffffffffffffffaL; +} + +/* { dg-final { scan-assembler-times "rori\t" 5 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-shNadd-01.c b/gcc/testsuite/gcc.target/riscv/rvb-shNadd-01.c new file mode 100644 index 000000000000..aaabaf5e4e47 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-shNadd-01.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zba -mabi=lp64 -O2" } */ + +long test_1(long a, long b) +{ + return a + (b << 1); +} +long test_2(long a, long b) +{ + return a + (b << 2); +} +long test_3(long a, long b) +{ + return a + (b << 3); +} + +/* { dg-final { scan-assembler-times "sh1add" 1 } } */ +/* { dg-final { scan-assembler-times "sh2add" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-shNadd-02.c b/gcc/testsuite/gcc.target/riscv/rvb-shNadd-02.c new file mode 100644 index 000000000000..8dfea4a1a855 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-shNadd-02.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zba -mabi=ilp32 -O2" } */ + +long test_1(long a, long b) +{ + return a + (b << 1); +} +long test_2(long a, long b) +{ + return a + (b << 2); +} +long test_3(long a, long b) +{ + return a + (b << 3); +} + +/* { dg-final { scan-assembler-times "sh1add" 1 } } */ +/* { dg-final { scan-assembler-times "sh2add" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-shNadd-03.c b/gcc/testsuite/gcc.target/riscv/rvb-shNadd-03.c new file mode 100644 index 000000000000..b2ea231a255c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-shNadd-03.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zba -mabi=lp64 -O2" } */ + +/* RV64 only. */ +int foos(short *x, int n){ + return x[n]; +} +int fooi(int *x, int n){ + return x[n]; +} +int fooll(long long *x, int n){ + return x[n]; +} + +/* RV64 only. */ +int ufoos(short *x, unsigned int n){ + return x[n]; +} +int ufooi(int *x, unsigned int n){ + return x[n]; +} +int ufooll(long long *x, unsigned int n){ + return x[n]; +} + +/* { dg-final { scan-assembler-times "sh1add\t" 1 } } */ +/* { dg-final { scan-assembler-times "sh2add\t" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add\t" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add.uw" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add.uw" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add.uw" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-slliuw.c b/gcc/testsuite/gcc.target/riscv/rvb-slliuw.c new file mode 100644 index 000000000000..4d00e02599c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-slliuw.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64 -O2" } */ + +long +foo (long i) +{ + return (long)(unsigned int)i << 10; +} + +/* { dg-final { scan-assembler "slli.uw" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-andn-orn-xnor-01.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-andn-orn-xnor-01.c new file mode 100644 index 000000000000..0037dea5647a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-andn-orn-xnor-01.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */ + +unsigned long long foo1(unsigned long long rs1, unsigned long long rs2) +{ +return rs1 & ~rs2; +} + +unsigned long long foo2(unsigned long long rs1, unsigned long long rs2) +{ +return rs1 | ~rs2; +} + +unsigned long long foo3(unsigned long long rs1, unsigned long long rs2) +{ +return rs1 ^ ~rs2; +} + +/* { dg-final { scan-assembler-times "andn" 2 } } */ +/* { dg-final { scan-assembler-times "orn" 2 } } */ +/* { dg-final { scan-assembler-times "xnor" 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-andn-orn-xnor-02.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-andn-orn-xnor-02.c new file mode 100644 index 000000000000..b0c1e40c5546 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-andn-orn-xnor-02.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32 -O2" } */ + +unsigned int foo1(unsigned int rs1, unsigned int rs2) +{ +return rs1 & ~rs2; +} + +unsigned int foo2(unsigned int rs1, unsigned int rs2) +{ +return rs1 | ~rs2; +} + +unsigned int foo3(unsigned int rs1, unsigned int rs2) +{ +return rs1 ^ ~rs2; +} + +/* { dg-final { scan-assembler-times "andn" 2 } } */ +/* { dg-final { scan-assembler-times "orn" 2 } } */ +/* { dg-final { scan-assembler-times "xnor" 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-min-max.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-min-max.c new file mode 100644 index 000000000000..f44c398ea080 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-min-max.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */ + +long +foo1 (long i, long j) +{ + return i < j ? i : j; +} + +long +foo2 (long i, long j) +{ + return i > j ? i : j; +} + +unsigned long +foo3 (unsigned long i, unsigned long j) +{ + return i < j ? i : j; +} + +unsigned long +foo4 (unsigned long i, unsigned long j) +{ + return i > j ? i : j; +} + +/* { dg-final { scan-assembler-times "min" 3 } } */ +/* { dg-final { scan-assembler-times "max" 3 } } */ +/* { dg-final { scan-assembler-times "minu" 1 } } */ +/* { dg-final { scan-assembler-times "maxu" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-01.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-01.c new file mode 100644 index 000000000000..958966289df9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-01.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */ + +unsigned long foo1(unsigned long rs1, unsigned long rs2) +{ + long shamt = rs2 & (64 - 1); + return (rs1 << shamt) | (rs1 >> ((64 - shamt) & (64 - 1))); +} +unsigned long foo2(unsigned long rs1, unsigned long rs2) +{ + unsigned long shamt = rs2 & (64 - 1); + return (rs1 >> shamt) | (rs1 << ((64 - shamt) & (64 - 1))); +} + +/* { dg-final { scan-assembler-times "rol" 2 } } */ +/* { dg-final { scan-assembler-times "ror" 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-02.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-02.c new file mode 100644 index 000000000000..24b482f21453 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-02.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32 -O2" } */ + +unsigned int foo1(unsigned int rs1, unsigned int rs2) +{ + unsigned int shamt = rs2 & (32 - 1); + return (rs1 << shamt) | (rs1 >> ((32 - shamt) & (32 - 1))); +} +unsigned int foo2(unsigned int rs1, unsigned int rs2) +{ + unsigned int shamt = rs2 & (32 - 1); + return (rs1 >> shamt) | (rs1 << ((32 - shamt) & (32 - 1))); +} + +/* { dg-final { scan-assembler-times "rol" 2 } } */ +/* { dg-final { scan-assembler-times "ror" 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-03.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-03.c new file mode 100644 index 000000000000..ffde7c9cd589 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-rol-ror-03.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */ + +/* RV64 only*/ +unsigned int rol(unsigned int rs1, unsigned int rs2) +{ + int shamt = rs2 & (32 - 1); + return (rs1 << shamt) | (rs1 >> ((64 - shamt) & (32 - 1))); +} +unsigned int ror(unsigned int rs1, unsigned int rs2) +{ + int shamt = rs2 & (64 - 1); + return (rs1 >> shamt) | (rs1 << ((32 - shamt) & (32 - 1))); +} + +/* { dg-final { scan-assembler-times "rolw" 1 } } */ +/* { dg-final { scan-assembler-times "rorw" 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-sext_b.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-sext_b.c new file mode 100644 index 000000000000..c1646058f58f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-sext_b.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -O2" { target { riscv32*-*-* } } } */ +/* { dg-options "-march=rv64gc_zbb -O2" { target { riscv64*-*-* } } } */ + +long sext_h(long a) { + return (signed char)a; +} + +/* { dg-final { scan-assembler-times "sext.b\t" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbb-sext_h.c b/gcc/testsuite/gcc.target/riscv/rvb-zbb-sext_h.c new file mode 100644 index 000000000000..785f9da9cb94 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbb-sext_h.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -O2" { target { riscv32*-*-* } } } */ +/* { dg-options "-march=rv64gc_zbb -O2" { target { riscv64*-*-* } } } */ + +long sext_h(long a) { + return (short)a; +} + +/* { dg-final { scan-assembler-times "sext.h\t" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbbw.c b/gcc/testsuite/gcc.target/riscv/rvb-zbbw.c new file mode 100644 index 000000000000..236ddf7b5834 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbbw.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */ + +int +clz (int i) +{ + return __builtin_clz (i); +} + +int +ctz (int i) +{ + return __builtin_ctz (i); +} + +int +popcount (int i) +{ + return __builtin_popcount (i); +} + + +/* { dg-final { scan-assembler-times "clzw" 1 } } */ +/* { dg-final { scan-assembler-times "ctzw" 1 } } */ +/* { dg-final { scan-assembler-times "cpopw" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbs-bclr.c b/gcc/testsuite/gcc.target/riscv/rvb-zbs-bclr.c new file mode 100644 index 000000000000..082fc72aad0a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbs-bclr.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64 -O2" } */ + +/* bclr */ +long +foo0 (long i, long j) +{ + return i & ~(1L << j); +} + +/* bclri */ +long +foo1 (long i) +{ + return i & ~(1L << 20); +} + +/* { dg-final { scan-assembler-times "bclr" 3 } } */ +/* { dg-final { scan-assembler-times "bclri" 1 } } */ +/* { dg-final { scan-assembler-not "andi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbs-bext.c b/gcc/testsuite/gcc.target/riscv/rvb-zbs-bext.c new file mode 100644 index 000000000000..1b2ecac5750d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbs-bext.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbs -mabi=lp64 -O2" } */ + +/* bext */ +long +foo0 (long i, long j) +{ + return 1L & (i >> j); +} + +/* bexti */ +long +foo1 (long i) +{ + return 1L & (i >> 20); +} + +/* { dg-final { scan-assembler-times "bext" 3 } } */ +/* { dg-final { scan-assembler-times "bexti" 1 } } */ +/* { dg-final { scan-assembler-not "andi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbs-binv.c b/gcc/testsuite/gcc.target/riscv/rvb-zbs-binv.c new file mode 100644 index 000000000000..b7a0117961cf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbs-binv.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbs -mabi=lp64 -O2" } */ + +/* binv */ +long +foo0 (long i, long j) +{ + return i ^ (1L << j); +} + +/* binvi */ +long +foo1 (long i) +{ + return i ^ (1L << 20); +} + +/* { dg-final { scan-assembler-times "binv" 3 } } */ +/* { dg-final { scan-assembler-times "binvi" 1 } } */ +/* { dg-final { scan-assembler-not "andi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvb-zbs-bset.c b/gcc/testsuite/gcc.target/riscv/rvb-zbs-bset.c new file mode 100644 index 000000000000..fa2ca8154f64 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvb-zbs-bset.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbs -mabi=lp64 -O2" } */ + +/* bset */ +long +sub0 (long i, long j) +{ + return i | (1L << j); +} + +/* bset_mask */ +long +sub1 (long i, long j) +{ + return i | (1L << (j & 0x3f)); +} + +/* bset_1 */ +long +sub2 (long i) +{ + return 1L << i; +} + +/* bset_1_mask */ +long +sub3 (long i) +{ + return 1L << (i & 0x3f); +} + +/* bseti */ +long +sub4 (long i) +{ + return i | (1L << 20); +} + +/* { dg-final { scan-assembler-times "bset" 6 } } */ +/* { dg-final { scan-assembler-times "bseti" 1 } } */ +/* { dg-final { scan-assembler-not "andi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvk-zbkx-xperm-01.c b/gcc/testsuite/gcc.target/riscv/rvk-zbkx-xperm-01.c new file mode 100644 index 000000000000..fccbbd16a349 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvk-zbkx-xperm-01.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbkx -mabi=ilp32 -O2" } */ +unsigned int xperm(unsigned int rs1, unsigned int rs2, int sz_log2) +{ + unsigned int r = 0; + unsigned int sz = 1LL << sz_log2; + unsigned int mask = (1LL << sz) - 1; + for (int i = 0; i < 32; i += sz) + { + unsigned int pos = ((rs2 >> i) & mask) << sz_log2; + if (pos < 32) + r |= ((rs1 >> pos) & mask) << i; + } + return r; +} + +unsigned int xperm_n (unsigned int rs1, unsigned int rs2) { return xperm(rs1, rs2, 2); } +unsigned int xperm_b (unsigned int rs1, unsigned int rs2) { return xperm(rs1, rs2, 3); } + +/* { dg-final { scan-assembler-times "xperm.n" 5 } } */ +/* { dg-final { scan-assembler-times "xperm.b" 5 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvk-zbkx-xperm-02.c b/gcc/testsuite/gcc.target/riscv/rvk-zbkx-xperm-02.c new file mode 100644 index 000000000000..bdd4da95ea40 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvk-zbkx-xperm-02.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbkx -mabi=lp64 -O2" } */ +unsigned long xperm(unsigned long rs1, unsigned long rs2, int sz_log2) +{ + unsigned long r = 0; + unsigned long sz = 1LL << sz_log2; + unsigned long mask = (1LL << sz) - 1; + for (int i = 0; i < 64; i += sz) + { + unsigned long pos = ((rs2 >> i) & mask) << sz_log2; + if (pos < 64) + r |= ((rs1 >> pos) & mask) << i; + } + return r; +} + +unsigned long xperm_n (unsigned long rs1, unsigned long rs2) { return xperm(rs1, rs2, 2); } +unsigned long xperm_b (unsigned long rs1, unsigned long rs2) { return xperm(rs1, rs2, 3); } + +/* { dg-final { scan-assembler-times "xperm.n" 5 } } */ +/* { dg-final { scan-assembler-times "xperm.b" 5 } } */