Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 11, 2024
2 parents c68cc53 + 5f4fe93 commit 6ac4d7a
Show file tree
Hide file tree
Showing 15 changed files with 516 additions and 27 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
name: test
on: [push]
on:
push:
branches:
- '*'

defaults:
run:
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)

project(xbyak LANGUAGES CXX VERSION 7.09)
project(xbyak LANGUAGES CXX VERSION 7.09.1)

file(GLOB headers xbyak/*.h)

Expand Down
2 changes: 1 addition & 1 deletion doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# History

* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some nmemonics with {sae}/{er}.
* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}.
* 2024/Oct/07 ver 7.08 support rdfsbase etc.
* 2024/Aug/29 ver 7.07.1 adapt to NASM 2.16.03 output of xchg (The functionality stays the same.)
* 2024/Jun/11 ver 7.07 support xresldtrk/xsusldtrk
Expand Down
4 changes: 2 additions & 2 deletions gen/gen_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ void putXM_X()
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },

{ 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
{ 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
{ 0x63, "vpcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
{ 0x63, "vpcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.09',
version: '7.09.1',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Xbyak 7.09 [![Badge Build]][Build Status]
# Xbyak 7.09.1 [![Badge Build]][Build Status]

*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*

Expand Down
2 changes: 1 addition & 1 deletion readme.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.09.1

-----------------------------------------------------------------------------
◎概要
Expand Down
6 changes: 5 additions & 1 deletion test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ apx: apx.cpp $(XBYAK_INC)
avx10_test: avx10_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64

TEST_FILES=avx10.txt misc.txt
xed_test:
@for target in $(addprefix target/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done

test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
ifneq ($(ONLY_64BIT),1)
Expand Down Expand Up @@ -118,7 +122,7 @@ test: detect_x32
$(MAKE) test_avx512

clean:
$(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test
$(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512 avx10_test detect_x32

lib_run: lib_test.cpp lib_run.cpp lib.h
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
Expand Down
28 changes: 14 additions & 14 deletions test/misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,24 +285,24 @@ CYBOZU_TEST_AUTO(vpclmulqdq)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vcompressb_w)
CYBOZU_TEST_AUTO(vpcompressb_w)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vcompressb(ptr[rax + 64], xmm1);
vcompressb(xmm30 | k5, xmm1);
vcompressb(ptr[rax + 64], ymm1);
vcompressb(ymm30 | k3 |T_z, ymm1);
vcompressb(ptr[rax + 64], zmm1);
vcompressb(zmm30 | k2 |T_z, zmm1);

vcompressw(ptr[rax + 64], xmm1);
vcompressw(xmm30 | k5, xmm1);
vcompressw(ptr[rax + 64], ymm1);
vcompressw(ymm30 | k3 |T_z, ymm1);
vcompressw(ptr[rax + 64], zmm1);
vcompressw(zmm30 | k2 |T_z, zmm1);
vpcompressb(ptr[rax + 64], xmm1);
vpcompressb(xmm30 | k5, xmm1);
vpcompressb(ptr[rax + 64], ymm1);
vpcompressb(ymm30 | k3 |T_z, ymm1);
vpcompressb(ptr[rax + 64], zmm1);
vpcompressb(zmm30 | k2 |T_z, zmm1);

vpcompressw(ptr[rax + 64], xmm1);
vpcompressw(xmm30 | k5, xmm1);
vpcompressw(ptr[rax + 64], ymm1);
vpcompressw(ymm30 | k3 |T_z, ymm1);
vpcompressw(ptr[rax + 64], zmm1);
vpcompressw(zmm30 | k2 |T_z, zmm1);
}
} c;
const uint8_t tbl[] = {
Expand Down
149 changes: 149 additions & 0 deletions test/target/avx10.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
vaddpd(ymm1, ymm2, ymm3 |T_rn_sae);
vaddph(ymm1, ymm2, ymm3 |T_rn_sae);
vaddps(ymm1, ymm2, ymm3 |T_rn_sae);
vcmppd(k1, ymm2, ymm3 |T_sae, 3);
vcmpph(k1, ymm2, ymm3 |T_sae, 3);
vcmpps(k1, ymm2, ymm3 |T_sae, 3);
vcvtdq2ph(xmm1, ymm2 |T_rn_sae);
vcvtdq2ps(ymm1, ymm2 |T_rn_sae);
vcvtpd2dq(xmm1, ymm2 |T_rn_sae);
vcvtpd2ph(xmm1, ymm2 |T_rn_sae);
vcvtpd2ps(xmm1, ymm2 |T_rn_sae);
vcvtpd2qq(ymm1, ymm2 |T_rn_sae);
vcvtpd2udq(xmm1, ymm2 |T_rn_sae);
vcvtpd2uqq(ymm1, ymm2 |T_rn_sae);
vcvtph2dq(ymm1, xmm2 |T_rn_sae);
vcvtph2pd(ymm1, xmm2 |T_sae);
vcvtph2ps(ymm1, xmm2 |T_sae);
vcvtph2psx(ymm1, xmm2 |T_sae);
vcvtph2qq(ymm1, xmm2 |T_rn_sae);
vcvtph2udq(ymm1, xmm2 |T_rn_sae);
vcvtph2uqq(ymm1, xmm2 |T_rn_sae);
vcvtph2uw(ymm1, ymm2 |T_rn_sae);
vcvtph2w(ymm1, ymm2 |T_rn_sae);
vcvtps2dq(ymm1, ymm2 |T_rn_sae);
vcvtps2pd(ymm1, xmm2 |T_sae);
vcvtps2ph(xmm1, ymm2 |T_sae, 3);
vcvtps2phx(xmm1, ymm2 |T_rn_sae);
vcvtps2qq(ymm1, xmm2 |T_rn_sae);
vcvtps2udq(ymm1, ymm2 |T_rn_sae);
vcvtps2uqq(ymm1, xmm2 |T_rn_sae);
vcvtqq2pd(ymm1, ymm2 |T_rn_sae);
vcvtqq2ph(xmm1, ymm2 |T_rn_sae);
vcvtqq2ps(xmm1, ymm2 |T_rn_sae);
vcvttpd2dq(xmm1, ymm2 |T_sae);
vcvttpd2qq(ymm1, ymm2 |T_sae);
vcvttpd2udq(xmm1, ymm2 |T_sae);
vcvttpd2uqq(ymm1, ymm2 |T_sae);
vcvttph2dq(ymm1, xmm2 |T_sae);
vcvttph2qq(ymm1, xmm2 |T_sae);
vcvttph2udq(ymm1, xmm2 |T_sae);
vcvttph2uqq(ymm1, xmm2 |T_sae);
vcvttph2uw(ymm1, ymm2 |T_sae);
vcvttph2w(ymm1, ymm2 |T_sae);
vcvttps2dq(ymm1, ymm2 |T_sae);
vcvttps2qq(ymm1, xmm2 |T_sae);
vcvttps2udq(ymm1, ymm2 |T_sae);
vcvttps2uqq(ymm1, xmm2 |T_sae);
vcvtudq2ph(xmm1, ymm2 |T_rn_sae);
vcvtudq2ps(ymm1, ymm2 |T_rn_sae);
vcvtuqq2pd(ymm1, ymm2 |T_rn_sae);
vcvtuqq2ph(xmm1, ymm2 |T_rn_sae);
vcvtuqq2ps(xmm1, ymm2 |T_rn_sae);
vcvtuw2ph(ymm1, ymm2 |T_rn_sae);
vcvtw2ph(ymm1, ymm2 |T_rn_sae);
vdivpd(ymm1, ymm2, ymm3 |T_rn_sae);
vdivph(ymm1, ymm2, ymm3 |T_rn_sae);
vdivps(ymm1, ymm2, ymm3 |T_rn_sae);
vfcmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
vfcmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
vfixupimmpd(ymm1, ymm2, ymm3 |T_sae, 3);
vfixupimmps(ymm1, ymm2, ymm3 |T_sae, 3);
vfmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddcph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmaddsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfmsubadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfmulcph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd132pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd132ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd132ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd213pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd213ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd213ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd231pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd231ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmadd231ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub132pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub132ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub132ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub213pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub213ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub213ps(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub231pd(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub231ph(ymm1, ymm2, ymm3 |T_rn_sae);
vfnmsub231ps(ymm1, ymm2, ymm3 |T_rn_sae);
vgetexppd(ymm1, ymm2 |T_sae);
vgetexpph(ymm1, ymm2 |T_sae);
vgetexpps(ymm1, ymm2 |T_sae);
vgetmantpd(ymm1, ymm2 |T_sae, 3);
vgetmantph(ymm1, ymm2 |T_sae, 3);
vgetmantps(ymm1, ymm2 |T_sae, 3);
vmaxpd(ymm1, ymm2, ymm3 |T_sae);
vmaxph(ymm1, ymm2, ymm3 |T_sae);
vmaxps(ymm1, ymm2, ymm3 |T_sae);
vminpd(ymm1, ymm2, ymm3 |T_sae);
vminph(ymm1, ymm2, ymm3 |T_sae);
vminps(ymm1, ymm2, ymm3 |T_sae);
vmulpd(ymm1, ymm2, ymm3 |T_rn_sae);
vmulph(ymm1, ymm2, ymm3 |T_rn_sae);
vmulps(ymm1, ymm2, ymm3 |T_rn_sae);
vrangepd(ymm1, ymm2, ymm3 |T_sae, 3);
vrangeps(ymm1, ymm2, ymm3 |T_sae, 3);
vreducepd(ymm1, ymm2 |T_sae, 3);
vreduceph(ymm1, ymm2 |T_sae, 3);
vreduceps(ymm1, ymm2 |T_sae, 3);
vrndscalepd(ymm1, ymm2 |T_sae, 3);
vrndscaleph(ymm1, ymm2 |T_sae, 3);
vrndscaleps(ymm1, ymm2 |T_sae, 3);
vscalefpd(ymm1, ymm2, ymm3 |T_rn_sae);
vscalefph(ymm1, ymm2, ymm3 |T_rn_sae);
vscalefps(ymm1, ymm2, ymm3 |T_rn_sae);
vsqrtpd(ymm1, ymm2 |T_rn_sae);
vsqrtph(ymm1, ymm2 |T_rn_sae);
vsqrtps(ymm1, ymm2 |T_rn_sae);
vsubpd(ymm1, ymm2, ymm3 |T_rn_sae);
vsubph(ymm1, ymm2, ymm3 |T_rn_sae);
vsubps(ymm1, ymm2, ymm3 |T_rn_sae);
23 changes: 23 additions & 0 deletions test/test_by_xed.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include <stdio.h>
#include <xbyak/xbyak.h>

struct Code : Xbyak::CodeGenerator {
Code()
{
#include "cpp.txt"
}
};

int main()
try
{
Code c;
FILE *fp = fopen("bin", "wb");
if (fp) {
fwrite(c.getCode(), 1, c.getSize(), fp);
fclose(fp);
}
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
return 1;
}
Loading

0 comments on commit 6ac4d7a

Please sign in to comment.