Skip to content

Commit

Permalink
more robust fix to the over-run issue
Browse files Browse the repository at this point in the history
  • Loading branch information
David-Baddeley committed May 1, 2020
1 parent de8ca80 commit a45abf9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pymecompress/bcl.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def HuffmanCompressQuant(data, float offset, float scale):
cdef int dsize = data.size

out = np.zeros(int(dsize*1.01 + 320),'uint8')
quant = np.zeros(int(np.ceil(dsize/16.0)*16), 'uint8') #quantization output buffer needs to be a multiple of 16 bytes if using AVX quantization
quant = np.zeros(dsize, 'uint8')
cdef unsigned char [:] ov = out
cdef unsigned char [:] qv = quant

Expand Down
11 changes: 8 additions & 3 deletions pymecompress/quantize.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ void quantize_u16_avx(uint16_t * data, uint8_t * out, int size, float offset, fl
offs = _mm256_set1_ps(offset);
sc = _mm256_set1_ps(qs);

/*process 16 values at a time*/

for (i = 0; i < size; i+=16)
/* process 16 values at a time - only do the aligned bit */
for (i = 0; i < (16*(size/16)); i+=16)
{
/* process first 8 values */
t2 = _mm_load_si128((__m128i *) &(data[i]));
Expand Down Expand Up @@ -107,6 +106,12 @@ void quantize_u16_avx(uint16_t * data, uint8_t * out, int size, float offset, fl

//out[i] = qs*sqrtf(data[i] - offset);
}

//do the unaligned bit
for (; i < size; i++)
{
out[i] = (uint8_t) roundf(qs*sqrtf(data[i] - offset));
}
}

//#endif
Expand Down

0 comments on commit a45abf9

Please sign in to comment.