Skip to content

Commit

Permalink
Merge pull request #3501 from bylaws/memcpy
Browse files Browse the repository at this point in the history
FEXCore: Fallback to the memcpy slow path for overlaps within 32 bytes
  • Loading branch information
Sonicadvance1 authored Mar 21, 2024
2 parents 7dcacfe + 12fb26f commit 167896d
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 160 deletions.
15 changes: 7 additions & 8 deletions FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1814,10 +1814,6 @@ DEF_OP(MemSet) {
ARMEmitter::ForwardLabel AgainInternal128Exit{};
ARMEmitter::BackwardLabel AgainInternal128{};

// Fallback to byte by byte loop if not 4 byte aligned
and_(ARMEmitter::Size::i64Bit, TMP4, TMP2, 0x3);
cbnz(ARMEmitter::Size::i64Bit, TMP4, &AgainInternal);

if (Direction == -1) {
sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, 32 - Size);
}
Expand Down Expand Up @@ -2127,15 +2123,18 @@ DEF_OP(MemCpy) {
cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

if (!Op->IsAtomic) {
ARMEmitter::ForwardLabel AbsPos{};
ARMEmitter::ForwardLabel AgainInternal256Exit{};
ARMEmitter::ForwardLabel AgainInternal128Exit{};
ARMEmitter::BackwardLabel AgainInternal128{};
ARMEmitter::BackwardLabel AgainInternal256{};

// Fallback to byte by byte loop if either of start/end are not 4 byte aligned
orr(ARMEmitter::Size::i64Bit, TMP4, TMP2, TMP3);
and_(ARMEmitter::Size::i64Bit, TMP4, TMP4, 0x3);
cbnz(ARMEmitter::Size::i64Bit, TMP4, &AgainInternal);
sub(ARMEmitter::Size::i64Bit, TMP4, TMP2, TMP3);
tbz(TMP4, 63, &AbsPos);
neg(ARMEmitter::Size::i64Bit, TMP4, TMP4);
Bind(&AbsPos);
sub(ARMEmitter::Size::i64Bit, TMP4, TMP4, 32);
tbnz(TMP4, 63, &AgainInternal);

if (Direction == -1) {
sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, 32 - Size);
Expand Down
Loading

0 comments on commit 167896d

Please sign in to comment.