more robust fix to the over-run issue

python-microscopy · May 1, 2020 · a45abf9 · a45abf9
1 parent de8ca80
commit a45abf9
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 4 deletions.
diff --git a/pymecompress/bcl.pyx b/pymecompress/bcl.pyx
@@ -60,7 +60,7 @@ def HuffmanCompressQuant(data, float offset, float scale):
     cdef int dsize = data.size
 
     out = np.zeros(int(dsize*1.01 + 320),'uint8')
-    quant = np.zeros(int(np.ceil(dsize/16.0)*16), 'uint8') #quantization output buffer needs to be a multiple of 16 bytes if using AVX quantization
+    quant = np.zeros(dsize, 'uint8')
     cdef unsigned char [:] ov = out
     cdef unsigned char [:] qv = quant
 

diff --git a/pymecompress/quantize.c b/pymecompress/quantize.c
@@ -58,9 +58,8 @@ void quantize_u16_avx(uint16_t * data, uint8_t * out, int size, float offset, fl
     offs = _mm256_set1_ps(offset);
     sc = _mm256_set1_ps(qs);
 
-    /*process 16 values at a time*/
-
-    for (i = 0; i < size; i+=16)
+    /* process 16 values at a time - only do the aligned bit */
+    for (i = 0; i < (16*(size/16)); i+=16)
     {
         /* process first 8 values */
         t2 = _mm_load_si128((__m128i *) &(data[i]));
@@ -107,6 +106,12 @@ void quantize_u16_avx(uint16_t * data, uint8_t * out, int size, float offset, fl
 
         //out[i] = qs*sqrtf(data[i] - offset);
     }
+
+    //do the unaligned bit
+    for (; i < size; i++)
+    {
+        out[i] = (uint8_t) roundf(qs*sqrtf(data[i] - offset));
+    }
 }
 
 //#endif