From 8f2e151b17f571f643e901678948d6533cded38b Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 15 Oct 2024 13:58:17 +0900 Subject: [PATCH 1/2] mov opAVX10ZeroExt into private --- xbyak/xbyak.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index c0bd83ee..b3a4df86 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -2794,6 +2794,30 @@ class CodeGenerator : public CodeArray { } opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm); } + // AVX10 zero-extending for vmovd, vmovw + void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit) + { + const Operand *p1 = &op1; + const Operand *p2 = &op2; + bool rev = false; + if (p1->isMEM()) { + std::swap(p1, p2); + rev = true; + } + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + if (p1->isXMM()) { + std::swap(p1, p2); + rev = !rev; + } + int sel = -1; + if (getEncoding(enc, 1) == AVX10v2Encoding) { + if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev); + } else { + if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev); + } + if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION) + opAVX_X_X_XM(*static_cast(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]); + } public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; @@ -3203,30 +3227,6 @@ class CodeGenerator : public CodeArray { } db(0xC8 + (idx & 7)); } - // AVX10 zero-extending for vmovd, vmovw - void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit) - { - const Operand *p1 = &op1; - const Operand *p2 = &op2; - bool rev = false; - if (p1->isMEM()) { - std::swap(p1, p2); - rev = true; - } - if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) - if (p1->isXMM()) { - std::swap(p1, p2); - rev = !rev; - } - int sel = -1; - if (getEncoding(enc, 1) == AVX10v2Encoding) { - if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev); - } else { - if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev); - } - if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION) - opAVX_X_X_XM(*static_cast(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]); - } void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding) { const uint64_t typeTbl[] = { From c08b1d3cc4ed553c8ce764452b547a19a7ad52e6 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 15 Oct 2024 14:14:19 +0900 Subject: [PATCH 2/2] [doc] Revised and improved the English text. --- doc/changelog.md | 2 +- doc/usage.md | 2 ++ readme.md | 40 ++++++++++++++++++++-------------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/doc/changelog.md b/doc/changelog.md index 1461f6ed..e7264888 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,6 +1,6 @@ # History -* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10./ +* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10. * 2024/Oct/15 ver 7.11 Added full support for AVX10.2 * 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended. * 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw diff --git a/doc/usage.md b/doc/usage.md index dcb3e101..b8073cea 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -148,6 +148,8 @@ feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2 - Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw - Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2. +At first, I attempted to use EvexEncoding (resp. VexEncoding) instead of AVX10v2Encoding (resp. EvexEncoding) for `setDefaultEncodingAVX10`. +But I abandoned this idea when I found that `vmovd` and `vmovw` had different EVEX encodings in AVX512 and AVX10.2 ### Remark * `k1`, ..., `k7` are opmask registers. diff --git a/readme.md b/readme.md index 90d29345..322d09be 100644 --- a/readme.md +++ b/readme.md @@ -1,7 +1,7 @@ # Xbyak 7.20 [![Badge Build]][Build Status] -*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)* +*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2* ## Menu @@ -11,15 +11,15 @@ ## Abstract -Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. +Xbyak is a C++ header-only library that enables dynamic assembly of x86/x64 instructions using mnemonics. -The pronunciation of Xbyak is `kəi-bja-k`. -It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world. +The pronunciation of Xbyak is `/kʌɪbjæk/` (kai-byak). -## Feature +The name is derived from the Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate) (kaibyaku), which means "the beginning of the world" or "creation". -- header file only -- Intel/MASM like syntax +## Features +- Header-only library +- Intel/MASM-like syntax - Full support for AVX-512, APX, and AVX10.2 **Note**: @@ -32,22 +32,22 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang. ### News -- support AVX10.2 -- support xresldtrk/xsusldtrk -- support RAO-INT for APX -- support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE -- support APX except for a few instructions -- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma -- add movdiri, movdir64b, clwb, cldemote +- Support AVX10.2 +- Support xresldtrk/xsusldtrk +- Support RAO-INT for APX +- Support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE +- Support APX except for a few instructions +- Add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma +- Add movdiri, movdir64b, clwb, cldemote - WAITPKG instructions (tpause, umonitor, umwait) are supported. - MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp -- strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error. - - define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd. -- add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump. -- vnni instructions such as vpdpbusd supports vex encoding. -- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. +- Strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error. + - Define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd. +- Add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump. +- VNNI instructions such as vpdpbusd supports vex encoding. +- (Break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. - (Windows) `#include ` has been removed from xbyak.h, so add it explicitly if you need it. -- support exception-less mode see. [Exception-less mode](#exception-less-mode) +- Support exception-less mode see. [Exception-less mode](#exception-less-mode) - `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined. ### Supported OS