From 70e2f3b928b5e8d7898c4d612efca112839ee0de Mon Sep 17 00:00:00 2001
From: Koichiro Iwao
Date: Tue, 2 Apr 2024 10:35:28 +0900
Subject: [PATCH 1/4] Add wyhash.h with modifications by @trishume
---
module/Makefile.am | 1 +
module/wyhash.h | 113 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 114 insertions(+)
create mode 100644 module/wyhash.h
diff --git a/module/Makefile.am b/module/Makefile.am
index e3e9ba5a..a0b1dd4c 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -77,6 +77,7 @@ noinst_HEADERS = \
rdpXv.h \
amd64/funcs_amd64.h \
x86/funcs_x86.h \
+ wyhash.h \
$(EXTRA_HEADERS)
libxorgxrdp_la_LTLIBRARIES = libxorgxrdp.la
diff --git a/module/wyhash.h b/module/wyhash.h
new file mode 100644
index 00000000..5afc85f3
--- /dev/null
+++ b/module/wyhash.h
@@ -0,0 +1,113 @@
+/* Author: Wang Yi
+ chopped down and converted to older C standard for xorgxrdp
+*/
+#ifndef wyhash_final_version
+#define wyhash_final_version
+#ifndef WYHASH_CONDOM
+#define WYHASH_CONDOM 0
+#endif
+#include
+#include
+#if defined(_MSC_VER) && defined(_M_X64)
+ #include
+ #pragma intrinsic(_umul128)
+#endif
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+ #define _likely_(x) __builtin_expect(x,1)
+ #define _unlikely_(x) __builtin_expect(x,0)
+#else
+ #define _likely_(x) (x)
+ #define _unlikely_(x) (x)
+#endif
+static __inline__ uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
+static __inline__ void _wymum(uint64_t *A, uint64_t *B){
+#if defined(__SIZEOF_INT128__)
+ __uint128_t r;
+ r=*A; r*=*B;
+ #if(WYHASH_CONDOM>1)
+ *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
+ #else
+ *A=(uint64_t)r; *B=(uint64_t)(r>>64);
+ #endif
+#elif defined(_MSC_VER) && defined(_M_X64)
+ #if(WYHASH_CONDOM>1)
+ uint64_t a, b;
+ a=_umul128(*A,*B,&b);
+ *A^=a; *B^=b;
+ #else
+ *A=_umul128(*A,*B,B);
+ #endif
+#else
+ uint64_t ha, hb, la, lb, hi, lo;
+ uint64_t rh, rm0, rm1, rl, t, c;
+ ha=*A>>32; hb=*B>>32; la=(uint32_t)*A; lb=(uint32_t)*B;
+ rh=ha*hb; rm0=ha*lb; rm1=hb*la; rl=la*lb; t=rl+(rm0<<32); c=t>32)+(rm1>>32)+c;
+ #if(WYHASH_CONDOM>1)
+ *A^=lo; *B^=hi;
+ #else
+ *A=lo; *B=hi;
+ #endif
+#endif
+}
+static __inline__ uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
+#ifndef WYHASH_LITTLE_ENDIAN
+ #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ #define WYHASH_LITTLE_ENDIAN 1
+ #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #define WYHASH_LITTLE_ENDIAN 0
+ #endif
+#endif
+#if (WYHASH_LITTLE_ENDIAN)
+static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
+static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
+#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
+static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
+static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
+#elif defined(_MSC_VER)
+static __inline__ uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
+static __inline__ uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
+#endif
+static __inline__ uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
+static __inline__ uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
+#if(WYHASH_CONDOM>0)
+ uint64_t a, b;
+ if(_likely_(i<=8)){
+ if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
+ else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
+ else a=b=0;
+ }
+ else{ a=_wyr8(p); b=_wyr8(p+i-8); }
+ return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
+#else
+ #define oneshot_shift ((i<8)*((8-i)<<3))
+ return _wymix(secret[1]^len,_wymix((_wyr8(p)<>oneshot_shift)^seed));
+#endif
+}
+
+static __inline__ uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
+ if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
+ return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
+}
+
+static __inline__ uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
+ const uint8_t *p;
+ uint64_t i;
+ uint64_t see1;
+ p=(const uint8_t *)key;
+ i=len; seed^=*secret;
+ if(_unlikely_(i>64)){
+ see1=seed;
+ do{
+ seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
+ see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
+ p+=64; i-=64;
+ }while(i>64);
+ seed^=see1;
+ }
+ return _wyfinish(p,len,seed,secret,i);
+}
+const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
+static __inline__ uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
+static __inline__ uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
+#endif
From 813e613039bb451da2fe5916b87710b93d4e02da Mon Sep 17 00:00:00 2001
From: Koichiro Iwao
Date: Tue, 2 Apr 2024 10:37:44 +0900
Subject: [PATCH 2/4] Switch to wyhash from CRC for capture tile diff
Originally suggested by @trishume at #167.
---
module/rdpCapture.c | 19 +++++++++++--------
module/rdpClientCon.h | 2 +-
module/rdpEgl.c | 5 +++--
3 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/module/rdpCapture.c b/module/rdpCapture.c
index c7f9fa4a..107d6001 100644
--- a/module/rdpCapture.c
+++ b/module/rdpCapture.c
@@ -46,6 +46,10 @@ capture
#include "rdpMisc.h"
#include "rdpCapture.h"
+#include "wyhash.h"
+/* hex digits of pi as a 64 bit int */
+#define WYHASH_SEED 0x3243f6a8885a308dull
+
#if defined(XORGXRDP_GLAMOR)
#include "rdpEgl.h"
#include
@@ -848,7 +852,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
int dst_stride;
int crc_offset;
int crc_stride;
- int crc;
+ uint64_t crc;
int num_crcs;
int mon_index;
@@ -887,7 +891,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
/* resize the crc list */
clientCon->num_rfx_crcs_alloc[mon_index] = num_crcs;
free(clientCon->rfx_crcs[mon_index]);
- clientCon->rfx_crcs[mon_index] = g_new0(int, num_crcs);
+ clientCon->rfx_crcs[mon_index] = g_new0(uint64_t, num_crcs);
}
extents_rect = *rdpRegionExtents(in_reg);
@@ -913,7 +917,8 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
}
else
{
- crc = crc_start();
+ /* hex digits of pi as a 64 bit int */
+ crc = WYHASH_SEED;
if (rcode == rgnPART)
{
LLOGLN(10, ("rdpCapture2: rgnPART"));
@@ -922,8 +927,7 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
rdpRegionIntersect(&tile_reg, in_reg, &tile_reg);
rects = REGION_RECTS(&tile_reg);
num_rects = REGION_NUM_RECTS(&tile_reg);
- crc = crc_process_data(crc, rects,
- num_rects * sizeof(BoxRec));
+ crc = wyhash((const void*)rects, num_rects * sizeof(BoxRec), crc, _wyp);
rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
src, src_stride,
dst, dst_stride,
@@ -939,11 +943,10 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
&rect, 1);
}
crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8);
- crc = crc_process_data(crc, crc_dst, 64 * 64 * 4);
- crc = crc_end(crc);
+ crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp);
crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride
+ (x / XRDP_RFX_ALIGN);
- LLOGLN(10, ("rdpCapture2: crc 0x%8.8x 0x%8.8x",
+ LLOGLN(10, ("rdpCapture2: crc 0x%" PRIx64 " 0x%" PRIx64,
crc, clientCon->rfx_crcs[mon_index][crc_offset]));
if (crc == clientCon->rfx_crcs[mon_index][crc_offset])
{
diff --git a/module/rdpClientCon.h b/module/rdpClientCon.h
index 2ba22dab..d24ad537 100644
--- a/module/rdpClientCon.h
+++ b/module/rdpClientCon.h
@@ -123,7 +123,7 @@ struct _rdpClientCon
RegionPtr dirtyRegion;
int num_rfx_crcs_alloc[16];
- int *rfx_crcs[16];
+ uint64_t *rfx_crcs[16];
/* true = skip drawing */
int suppress_output;
diff --git a/module/rdpEgl.c b/module/rdpEgl.c
index b84f65bd..04866d83 100644
--- a/module/rdpEgl.c
+++ b/module/rdpEgl.c
@@ -576,7 +576,7 @@ rdpEglOut(rdpClientCon *clientCon, struct rdp_egl *egl, RegionPtr in_reg,
/* resize the crc list */
clientCon->num_rfx_crcs_alloc[mon_index] = num_crcs;
free(clientCon->rfx_crcs[mon_index]);
- clientCon->rfx_crcs[mon_index] = g_new0(int, num_crcs);
+ clientCon->rfx_crcs[mon_index] = g_new0(uint64_t, num_crcs);
}
tile_extents_stride = (tile_extents_rect->x2 - tile_extents_rect->x1) / 64;
out_rect_index = 0;
@@ -614,7 +614,8 @@ rdpEglOut(rdpClientCon *clientCon, struct rdp_egl *egl, RegionPtr in_reg,
crc = crc_end(crc);
if (crc != crcs[(ly / 64) * tile_extents_stride + (lx / 64)])
{
- LLOGLN(0, ("rdpEglOut: error crc no match 0x%8.8x 0x%8.8x",
+ LLOGLN(0, ("rdpEglOut: error crc no match "
+ "0x%" PRIx64 " 0x%" PRIx64,
crc,
crcs[(ly / 64) * tile_extents_stride + (lx / 64)]));
}
From b9475e81be7d879cb00a6fa0ec34f4c4e9405cd6 Mon Sep 17 00:00:00 2001
From: Koichiro Iwao
Date: Wed, 3 Apr 2024 15:22:07 +0900
Subject: [PATCH 3/4] Lazy color convert copy after hash in rdpCapture2
Originally developed @trishume.
---
module/rdpCapture.c | 36 +++++++++++++++++++++++++++++-------
1 file changed, 29 insertions(+), 7 deletions(-)
diff --git a/module/rdpCapture.c b/module/rdpCapture.c
index 107d6001..69851bff 100644
--- a/module/rdpCapture.c
+++ b/module/rdpCapture.c
@@ -590,6 +590,23 @@ isShmStatusActive(enum shared_memory_status status) {
}
}
+/******************************************************************************/
+/* copy rects with no error checking */
+static uint64_t
+wyhash_rfx_tile(const uint8_t *src, int src_stride, int x, int y, uint64_t seed)
+{
+ int row;
+ uint64_t hash;
+ const uint8_t *s8;
+ hash = seed;
+ for(row = 0; row < 64; row++)
+ {
+ s8 = src + (y+row) * src_stride + x * 4;
+ hash = wyhash((const void*)s8, 64 * 4, hash, _wyp);
+ }
+ return hash;
+}
+
/******************************************************************************/
static Bool
rdpCapture0(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
@@ -932,19 +949,16 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
src, src_stride,
dst, dst_stride,
rects, num_rects);
+ crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8);
+ crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp);
rdpRegionUninit(&tile_reg);
}
else /* rgnIN */
{
LLOGLN(10, ("rdpCapture2: rgnIN"));
- rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
- src, src_stride,
- dst, dst_stride,
- &rect, 1);
+ crc = wyhash_rfx_tile(src, src_stride, x, y, crc);
}
- crc_dst = dst + (y << 8) * (dst_stride >> 8) + (x << 8);
- crc = wyhash((const void*)crc_dst, 64 * 64 * 4, crc, _wyp);
- crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride
+ crc_offset = (y / XRDP_RFX_ALIGN) * crc_stride
+ (x / XRDP_RFX_ALIGN);
LLOGLN(10, ("rdpCapture2: crc 0x%" PRIx64 " 0x%" PRIx64,
crc, clientCon->rfx_crcs[mon_index][crc_offset]));
@@ -957,6 +971,14 @@ rdpCapture2(rdpClientCon *clientCon, RegionPtr in_reg, BoxPtr *out_rects,
}
else
{
+ /* lazily only do this if hash wasn't identical */
+ if (rcode != rgnPART)
+ {
+ rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
+ src, src_stride,
+ dst, dst_stride,
+ &rect, 1);
+ }
clientCon->rfx_crcs[mon_index][crc_offset] = crc;
(*out_rects)[out_rect_index] = rect;
out_rect_index++;
From ac6d867948f034fe20ebd59d0e5cfdcbf48784ce Mon Sep 17 00:00:00 2001
From: Koichiro Iwao
Date: Mon, 8 Apr 2024 00:05:42 +0900
Subject: [PATCH 4/4] Tighten loop
---
module/rdpCapture.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/module/rdpCapture.c b/module/rdpCapture.c
index 69851bff..bfde4eb5 100644
--- a/module/rdpCapture.c
+++ b/module/rdpCapture.c
@@ -599,10 +599,11 @@ wyhash_rfx_tile(const uint8_t *src, int src_stride, int x, int y, uint64_t seed)
uint64_t hash;
const uint8_t *s8;
hash = seed;
+ s8 = src + (y * src_stride) + (x * 4);
for(row = 0; row < 64; row++)
{
- s8 = src + (y+row) * src_stride + x * 4;
hash = wyhash((const void*)s8, 64 * 4, hash, _wyp);
+ s8 += src_stride;
}
return hash;
}