Skip to content

Commit 8c036d6

Browse files
xiangzhaiLeslie Zhai
authored and
Leslie Zhai
committed
Initial loongarch port
Co-authored-by: yangwenqing <[email protected]> Signed-off-by: Leslie Zhai <[email protected]> Signed-off-by: yangwenqing <[email protected]>
1 parent 0ec7b4e commit 8c036d6

30 files changed

+1912
-8
lines changed

CMakeLists.txt

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,9 @@ elseif (CMAKE_COMPILER_IS_CLANG AND NOT CROSS_COMPILE)
224224
elseif(ARCH_ARM32)
225225
set(GNUCC_ARCH armv7a)
226226
set(TUNE_FLAG generic)
227+
elseif(ARCH_LOONGARCH64)
228+
set(GNUCC_ARCH la464)
229+
set(TUNE_FLAG generic)
227230
else()
228231
set(GNUCC_ARCH native)
229232
set(TUNE_FLAG generic)
@@ -267,6 +270,11 @@ if (ARCH_AARCH64)
267270
endif()
268271
endif(ARCH_AARCH64)
269272

273+
if (ARCH_LOONGARCH64)
274+
set(ARCH_C_FLAGS "-mlsx")
275+
set(ARCH_CXX_FLAGS "-mlsx")
276+
endif(ARCH_LOONGARCH64)
277+
270278
message(STATUS "ARCH_C_FLAGS : ${ARCH_C_FLAGS}")
271279
message(STATUS "ARCH_CXX_FLAGS : ${ARCH_CXX_FLAGS}")
272280

@@ -275,8 +283,13 @@ if (NOT FAT_RUNTIME)
275283
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
276284
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -${TUNE_FLAG}=${GNUCC_TUNE}")
277285
else()
278-
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
279-
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
286+
if (ARCH_LOONGARCH64)
287+
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} ${ARCH_C_FLAGS}")
288+
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} ${ARCH_CXX_FLAGS}")
289+
else()
290+
set(ARCH_C_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_C_FLAGS}")
291+
set(ARCH_CXX_FLAGS "-${ARCH_FLAG}=${GNUCC_ARCH} -mtune=${TUNE_FLAG} ${ARCH_CXX_FLAGS}")
292+
endif()
280293
endif()
281294
endif()
282295

@@ -364,6 +377,8 @@ elseif (ARCH_ARM32 OR ARCH_AARCH64)
364377
endif()
365378
elseif (ARCH_PPC64EL)
366379
CHECK_INCLUDE_FILE_CXX(altivec.h HAVE_C_PPC64EL_ALTIVEC_H)
380+
elseif (ARCH_LOONGARCH64)
381+
CHECK_INCLUDE_FILE_CXX(lsxintrin.h HAVE_C_LOONGARCH64_LSXINTRIN_H)
367382
endif()
368383

369384
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
@@ -541,7 +556,7 @@ add_subdirectory(doc/dev-reference)
541556
# PCRE check, we have a fixed requirement for PCRE to use Chimera
542557
# and hscollider
543558
set(PCRE_REQUIRED_MAJOR_VERSION 8)
544-
set(PCRE_REQUIRED_MINOR_VERSION 41)
559+
set(PCRE_REQUIRED_MINOR_VERSION 39)
545560
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
546561
include (${CMAKE_MODULE_PATH}/pcre.cmake)
547562
if (NOT CORRECT_PCRE_VERSION)
@@ -622,6 +637,11 @@ set (hs_exec_common_SRCS
622637
${hs_exec_common_SRCS}
623638
src/util/arch/arm/cpuid_flags.c
624639
)
640+
elseif(ARCH_LOONGARCH64)
641+
set (hs_exec_common_SRCS
642+
${hs_exec_common_SRCS}
643+
src/util/arch/loongarch64/cpuid_flags.c
644+
)
625645
endif ()
626646

627647
set (hs_exec_SRCS
@@ -779,6 +799,10 @@ elseif (ARCH_PPC64EL)
779799
set (hs_exec_SRCS
780800
${hs_exec_SRCS}
781801
src/util/supervector/arch/ppc64el/impl.cpp)
802+
elseif (ARCH_LOONGARCH64)
803+
set (hs_exec_SRCS
804+
${hs_exec_SRCS}
805+
src/util/supervector/arch/loongarch64/impl.cpp)
782806
endif ()
783807
endif()
784808

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Vectorscan?
22

33
A fork of Intel's Hyperscan, modified to run on more platforms. Currently ARM NEON/ASIMD
4-
is 100% functional, and Power VSX are in development. ARM SVE2 will be implemented when
5-
harwdare becomes accessible to the developers. More platforms will follow in the future,
6-
on demand/request.
4+
is 100% functional, LoongArch LSX is 100% functional, and Power VSX are in development.
5+
ARM SVE2 will be implemented when harwdare becomes accessible to the developers.
6+
More platforms will follow in the future, on demand/request.
77

88
Vectorscan will follow Intel's API and internal algorithms where possible, but will not
99
hesitate to make code changes where it is thought of giving better performance or better

cmake/arch.cmake

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ elseif (HAVE_C_ARM_NEON_H)
1111
elseif (HAVE_C_PPC64EL_ALTIVEC_H)
1212
set (INTRIN_INC_H "altivec.h")
1313
set (FAT_RUNTIME OFF)
14+
elseif (HAVE_C_LOONGARCH64_LSXINTRIN_H)
15+
set (INTRIN_INC_H "lsxintrin.h")
16+
set (FAT_RUNTIME OFF)
1417
else()
1518
message (FATAL_ERROR "No intrinsics header found")
1619
endif ()
@@ -160,6 +163,12 @@ int main() {
160163
vector int a = vec_splat_s32(1);
161164
(void)a;
162165
}" HAVE_VSX)
166+
elseif (ARCH_LOONGARCH64)
167+
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
168+
int main() {
169+
__m128i a = __lsx_vreplgr2vr_w(1);
170+
(void)a;
171+
}" HAVE_LSX)
163172
else ()
164173
message (FATAL_ERROR "Unsupported architecture")
165174
endif ()

cmake/config.h.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
/* "Define if building for PPC64EL" */
2525
#cmakedefine ARCH_PPC64EL
2626

27+
/* "Define if building for LOONGARCH64" */
28+
#cmakedefine ARCH_LOONGARCH64
29+
2730
/* "Define if cross compiling for AARCH64" */
2831
#cmakedefine CROSS_COMPILE_AARCH64
2932

@@ -81,6 +84,9 @@
8184
/* C compiler has arm_neon.h */
8285
#cmakedefine HAVE_C_PPC64EL_ALTIVEC_H
8386

87+
/* C compiler has lsxintrin.h */
88+
#cmakedefine HAVE_C_LOONGARCH64_LSXINTRIN_H
89+
8490
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
8591
0 if you don't. */
8692
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP

cmake/platform.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ else()
1616
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
1717
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)
1818
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
19-
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
19+
CHECK_C_SOURCE_COMPILES("#if !(defined(__loongarch_lp64) || defined( __loongarch64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_LOONGARCH64)
20+
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL OR ARCH_LOONGARCH64)
2021
set(ARCH_64_BIT TRUE)
2122
else()
2223
set(ARCH_32_BIT TRUE)

src/hs.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
4949
#include "util/arch/x86/cpuid_inline.h"
5050
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
51+
#elif defined(ARCH_LOONGARCH64)
5152
#endif
5253
#include "util/depth.h"
5354
#include "util/popcount.h"

src/hs_valid_platform.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,7 @@ hs_error_t HS_CDECL hs_valid_platform(void) {
5252
}
5353
#elif defined(ARCH_PPC64EL)
5454
return HS_SUCCESS;
55+
#elif defined(ARCH_LOONGARCH64)
56+
return HS_SUCCESS;
5557
#endif
5658
}

src/nfa/loongarch64/shufti.hpp

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright (c) 2015-2017, Intel Corporation
3+
* Copyright (c) 2020-2021, VectorCamp PC
4+
* Copyright (c) 2023, Loongson Technology
5+
*
6+
* Redistribution and use in source and binary forms, with or without
7+
* modification, are permitted provided that the following conditions are met:
8+
*
9+
* * Redistributions of source code must retain the above copyright notice,
10+
* this list of conditions and the following disclaimer.
11+
* * Redistributions in binary form must reproduce the above copyright
12+
* notice, this list of conditions and the following disclaimer in the
13+
* documentation and/or other materials provided with the distribution.
14+
* * Neither the name of Intel Corporation nor the names of its contributors
15+
* may be used to endorse or promote products derived from this software
16+
* without specific prior written permission.
17+
*
18+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28+
* POSSIBILITY OF SUCH DAMAGE.
29+
*/
30+
31+
/** \file
32+
* \brief Shufti: character class acceleration.
33+
*/
34+
35+
template <uint16_t S>
36+
static really_inline
37+
const SuperVector<S> blockSingleMask(SuperVector<S> mask_lo, SuperVector<S> mask_hi, SuperVector<S> chars) {
38+
const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
39+
40+
SuperVector<S> c_lo = chars & low4bits;
41+
SuperVector<S> c_hi = chars.template vshr_8_imm<4>();
42+
c_lo = mask_lo.template pshufb<false>(c_lo);
43+
c_hi = mask_hi.template pshufb<false>(c_hi);
44+
45+
return (c_lo & c_hi) > (SuperVector<S>::Zeroes());
46+
}
47+
48+
template <uint16_t S>
49+
static really_inline
50+
SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi, SuperVector<S> mask2_lo, SuperVector<S> mask2_hi, SuperVector<S> chars) {
51+
52+
const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
53+
SuperVector<S> chars_lo = chars & low4bits;
54+
chars_lo.print8("chars_lo");
55+
SuperVector<S> chars_hi = chars.template vshr_64_imm<4>() & low4bits;
56+
chars_hi.print8("chars_hi");
57+
SuperVector<S> c1_lo = mask1_lo.template pshufb<true>(chars_lo);
58+
c1_lo.print8("c1_lo");
59+
SuperVector<S> c1_hi = mask1_hi.template pshufb<true>(chars_hi);
60+
c1_hi.print8("c1_hi");
61+
SuperVector<S> t1 = c1_lo | c1_hi;
62+
t1.print8("t1");
63+
64+
SuperVector<S> c2_lo = mask2_lo.template pshufb<true>(chars_lo);
65+
c2_lo.print8("c2_lo");
66+
SuperVector<S> c2_hi = mask2_hi.template pshufb<true>(chars_hi);
67+
c2_hi.print8("c2_hi");
68+
SuperVector<S> t2 = c2_lo | c2_hi;
69+
t2.print8("t2");
70+
t2.template vshr_128_imm<1>().print8("t2.vshr_128(1)");
71+
SuperVector<S> t = t1 | (t2.template vshr_128_imm<1>());
72+
t.print8("t");
73+
74+
return !t.eq(SuperVector<S>::Ones());
75+
}

src/nfa/loongarch64/truffle.hpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) 2015-2017, Intel Corporation
3+
* Copyright (c) 2020-2021, VectorCamp PC
4+
* Copyright (c) 2023, Loongson Technology
5+
*
6+
* Redistribution and use in source and binary forms, with or without
7+
* modification, are permitted provided that the following conditions are met:
8+
*
9+
* * Redistributions of source code must retain the above copyright notice,
10+
* this list of conditions and the following disclaimer.
11+
* * Redistributions in binary form must reproduce the above copyright
12+
* notice, this list of conditions and the following disclaimer in the
13+
* documentation and/or other materials provided with the distribution.
14+
* * Neither the name of Intel Corporation nor the names of its contributors
15+
* may be used to endorse or promote products derived from this software
16+
* without specific prior written permission.
17+
*
18+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28+
* POSSIBILITY OF SUCH DAMAGE.
29+
*/
30+
31+
/** \file
32+
* \brief Truffle: character class acceleration.
33+
*
34+
*/
35+
36+
template <uint16_t S>
37+
static really_inline
38+
const SuperVector<S> blockSingleMask(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_mask_lo_highset, SuperVector<S> chars) {
39+
40+
chars.print8("chars");
41+
shuf_mask_lo_highclear.print8("shuf_mask_lo_highclear");
42+
shuf_mask_lo_highset.print8("shuf_mask_lo_highset");
43+
44+
SuperVector<S> highconst = SuperVector<S>::dup_u8(0x80);
45+
highconst.print8("highconst");
46+
SuperVector<S> shuf_mask_hi = SuperVector<S>::dup_u64(0x8040201008040201);
47+
shuf_mask_hi.print8("shuf_mask_hi");
48+
49+
SuperVector<S> shuf1 = shuf_mask_lo_highclear.pshufb(chars);
50+
shuf1.print8("shuf1");
51+
SuperVector<S> t1 = chars ^ highconst;
52+
t1.print8("t1");
53+
SuperVector<S> shuf2 = shuf_mask_lo_highset.pshufb(t1);
54+
shuf2.print8("shuf2");
55+
SuperVector<S> t2 = highconst.opandnot(chars.template vshr_64_imm<4>());
56+
t2.print8("t2");
57+
SuperVector<S> shuf3 = shuf_mask_hi.pshufb(t2);
58+
shuf3.print8("shuf3");
59+
SuperVector<S> res = (shuf1 | shuf2) & shuf3;
60+
res.print8("(shuf1 | shuf2) & shuf3");
61+
62+
return !res.eq(SuperVector<S>::Zeroes());
63+
}

0 commit comments

Comments
 (0)