Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed misaligned memory access flagged by UBSan #2800

Merged
merged 1 commit into from
Nov 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 17 additions & 251 deletions libsrc/ncx.m4
Original file line number Diff line number Diff line change
Expand Up @@ -307,88 +307,24 @@ swapn2b(void *dst, const void *src, IntType nn)
IntType i;
uint16_t *op = (uint16_t*) dst;
uint16_t *ip = (uint16_t*) src;
uint16_t tmp;
for (i=0; i<nn; i++) {
op[i] = ip[i];
op[i] = (uint16_t)SWAP2(op[i]);
/* memcpy is used to handle the case of unaligned memory */
memcpy(&tmp, &ip[i], sizeof(tmp));
tmp = SWAP2(tmp);
memcpy(&op[i], &tmp, sizeof(tmp));
}
#if 0
char *op = dst;
const char *ip = src;

/* unroll the following to reduce loop overhead
*
* while (nn-- > 0)
* {
* *op++ = *(++ip);
* *op++ = *(ip++ -1);
* }
*/
while (nn > 3)
{
*op++ = *(++ip);
*op++ = *(ip++ -1);
*op++ = *(++ip);
*op++ = *(ip++ -1);
*op++ = *(++ip);
*op++ = *(ip++ -1);
*op++ = *(++ip);
*op++ = *(ip++ -1);
nn -= 4;
}
while (nn-- > 0)
{
*op++ = *(++ip);
*op++ = *(ip++ -1);
}
#endif
}

# ifndef vax
inline static void
swap4b(void *dst, const void *src)
{
/* copy over, make the below swap in-place */
uint32_t tmp;
/* use memcpy to avoid type punning */
/* memcpy is used to handle the case of unaligned memory */
memcpy(&tmp, src, sizeof(tmp));
tmp = SWAP4(tmp);
memcpy(dst, &tmp, 4);

/* Codes below will cause "break strict-aliasing rules" in gcc
uint32_t *op = (uint32_t*)dst;
*op = *(uint32_t*)src;
*op = SWAP4(*op);
*/

/* Below are copied from netCDF-4.
* See https://bugtracking.unidata.ucar.edu/browse/NCF-338
* Quote "One issue we are wrestling with is how compilers optimize this
* code. For some reason, we are actually needing to add an artificial
* move to a 4 byte space to get it to work. I think what is happening is
* that the optimizer is bit shifting within a double, which is incorrect.
* The following code actually does work correctly.
* This is in Linux land, gcc.
*
* However, the above in-place byte-swap does not appear affected by this.
*/
#if 0
uint32_t *ip = (uint32_t*)src;
uint32_t tempOut; /* cannot use pointer when gcc O2 optimizer is used */
tempOut = SWAP4(*ip);

*(float *)dst = *(float *)(&tempOut);
#endif

/* OLD implementation that results in four load and four store CPU
instructions
char *op = dst;
const char *ip = src;
op[0] = ip[3];
op[1] = ip[2];
op[2] = ip[1];
op[3] = ip[0];
*/

}
# endif /* !vax */

Expand All @@ -398,211 +334,41 @@ swapn4b(void *dst, const void *src, IntType nn)
IntType i;
uint32_t *op = (uint32_t*) dst;
uint32_t *ip = (uint32_t*) src;
uint32_t tmp;
for (i=0; i<nn; i++) {
/* copy over, make the below swap in-place */
op[i] = ip[i];
op[i] = SWAP4(op[i]);
/* memcpy is used to handle the case of unaligned memory */
memcpy(&tmp, &ip[i], sizeof(tmp));
tmp = SWAP4(tmp);
memcpy(&op[i], &tmp, sizeof(tmp));
}

#if 0
char *op = dst;
const char *ip = src;

/* unroll the following to reduce loop overhead
* while (nn-- > 0)
* {
* op[0] = ip[3];
* op[1] = ip[2];
* op[2] = ip[1];
* op[3] = ip[0];
* op += 4;
* ip += 4;
* }
*/
while (nn > 3)
{
op[0] = ip[3];
op[1] = ip[2];
op[2] = ip[1];
op[3] = ip[0];
op[4] = ip[7];
op[5] = ip[6];
op[6] = ip[5];
op[7] = ip[4];
op[8] = ip[11];
op[9] = ip[10];
op[10] = ip[9];
op[11] = ip[8];
op[12] = ip[15];
op[13] = ip[14];
op[14] = ip[13];
op[15] = ip[12];
op += 16;
ip += 16;
nn -= 4;
}
while (nn-- > 0)
{
op[0] = ip[3];
op[1] = ip[2];
op[2] = ip[1];
op[3] = ip[0];
op += 4;
ip += 4;
}
#endif
}

# ifndef vax
inline static void
swap8b(void *dst, const void *src)
{
#ifdef FLOAT_WORDS_BIGENDIAN
/* copy over, make the below swap in-place */
*(uint64_t*)dst = *(uint64_t*)src;

uint32_t *op = (uint32_t*)dst;
*op = SWAP4(*op);
op = (uint32_t*)((char*)dst+4);
*op = SWAP4(*op);
#else
uint64_t tmp;
/* use memcpy to avoid type punning */
/* memcpy is used to handle the case of unaligned memory */
memcpy(&tmp, src, sizeof(tmp));
tmp = SWAP8(tmp);
memcpy(dst, &tmp, 8);

/* Codes below will cause "break strict-aliasing rules" in gcc
uint64_t *op = (uint64_t*)dst;
*op = *(uint64_t*)src;
*op = SWAP8(*op);
*/
#endif

#if 0
char *op = dst;
const char *ip = src;
# ifndef FLOAT_WORDS_BIGENDIAN
op[0] = ip[7];
op[1] = ip[6];
op[2] = ip[5];
op[3] = ip[4];
op[4] = ip[3];
op[5] = ip[2];
op[6] = ip[1];
op[7] = ip[0];
# else
op[0] = ip[3];
op[1] = ip[2];
op[2] = ip[1];
op[3] = ip[0];
op[4] = ip[7];
op[5] = ip[6];
op[6] = ip[5];
op[7] = ip[4];
#endif
#endif
}
# endif /* !vax */

# ifndef vax
inline static void
swapn8b(void *dst, const void *src, IntType nn)
{
#ifdef FLOAT_WORDS_BIGENDIAN
IntType i;
uint64_t *dst_p = (uint64_t*) dst;
uint64_t *src_p = (uint64_t*) src;
for (i=0; i<nn; i++) {
/* copy over, make the below swap in-place */
dst_p[i] = src_p[i];
uint32_t *op = (uint32_t*)(&dst_p[i]);
*op = SWAP4(*op);
op = (uint32_t*)((char*)op+4);
*op = SWAP4(*op);
}
#else
IntType i;
uint64_t *op = (uint64_t*) dst;
uint64_t *ip = (uint64_t*) src;
uint64_t tmp;
for (i=0; i<nn; i++) {
/* copy over, make the below swap in-place */
op[i] = ip[i];
op[i] = SWAP8(op[i]);
/* memcpy is used to handle the case of unaligned memory */
memcpy(&tmp, &ip[i], sizeof(tmp));
tmp = SWAP8(tmp);
memcpy(&op[i], &tmp, sizeof(tmp));
}
#endif

#if 0
char *op = dst;
const char *ip = src;

/* unroll the following to reduce loop overhead
* while (nn-- > 0)
* {
* op[0] = ip[7];
* op[1] = ip[6];
* op[2] = ip[5];
* op[3] = ip[4];
* op[4] = ip[3];
* op[5] = ip[2];
* op[6] = ip[1];
* op[7] = ip[0];
* op += 8;
* ip += 8;
* }
*/
# ifndef FLOAT_WORDS_BIGENDIAN
while (nn > 1)
{
op[0] = ip[7];
op[1] = ip[6];
op[2] = ip[5];
op[3] = ip[4];
op[4] = ip[3];
op[5] = ip[2];
op[6] = ip[1];
op[7] = ip[0];
op[8] = ip[15];
op[9] = ip[14];
op[10] = ip[13];
op[11] = ip[12];
op[12] = ip[11];
op[13] = ip[10];
op[14] = ip[9];
op[15] = ip[8];
op += 16;
ip += 16;
nn -= 2;
}
while (nn-- > 0)
{
op[0] = ip[7];
op[1] = ip[6];
op[2] = ip[5];
op[3] = ip[4];
op[4] = ip[3];
op[5] = ip[2];
op[6] = ip[1];
op[7] = ip[0];
op += 8;
ip += 8;
}
# else
while (nn-- > 0)
{
op[0] = ip[3];
op[1] = ip[2];
op[2] = ip[1];
op[3] = ip[0];
op[4] = ip[7];
op[5] = ip[6];
op[6] = ip[5];
op[7] = ip[4];
op += 8;
ip += 8;
}
#endif
#endif
}
# endif /* !vax */

Expand Down