Updated kernel with flipped WG dimensions

woutdenolf · Jul 16, 2013 · 496d9ec · 496d9ec
1 parent 08b5508
commit 496d9ec
Show file tree

Hide file tree

Showing 8 changed files with 290 additions and 159 deletions.
diff --git a/openCL/algebra.cl b/openCL/algebra.cl
@@ -30,9 +30,9 @@ __kernel void combine(
 	int gid1 = (int) get_global_id(1);
 	int gid0 = (int) get_global_id(0);
 
-	if (gid0 < height && gid1 < width) {
+	if (gid1 < height && gid0 < width) {
 
-		int index = gid0 * width + gid1;
+		int index = gid1 * width + gid0;
 		int index_dog = dog * width * height +  index;
 		w[index_dog] = a * u[index] + b * v[index];
 	}

diff --git a/openCL/convolution.cl b/openCL/convolution.cl
@@ -23,9 +23,10 @@ __kernel void horizontal_convolution(
 
 	int HALF_FILTER_SIZE = (FILTER_SIZE % 2 == 1 ? (FILTER_SIZE)/2 : (FILTER_SIZE+1)/2);
 
-	if (gid0 < IMAGE_H && gid1 < IMAGE_W) {
+	if (gid1 < IMAGE_H && gid0 < IMAGE_W) {
 
-		int pos = gid0* IMAGE_W + gid1;
+//		int pos = gid0* IMAGE_W + gid1;
+		int pos = gid1*IMAGE_W + gid0;
 		int fIndex = 0;
 		float sum = 0.0f;
 		int c = 0;
@@ -36,13 +37,13 @@ __kernel void horizontal_convolution(
 		for (c = -HALF_FILTER_SIZE ; c < FILTER_SIZE-HALF_FILTER_SIZE ; c++) {
 
 			newpos = pos + c;
-			if (gid1 + c < 0) {
+			if (gid0 + c < 0) {
 				//debug=1;
-				newpos= pos - 2*gid1 - c - 1;
+				newpos= pos - 2*gid0 - c - 1;
 			}
 
-			else if (gid1 + c > IMAGE_W -1 ) {
-				newpos= (gid0+2)*IMAGE_W - gid1 -c -1;
+			else if (gid0 + c > IMAGE_W -1 ) {
+				newpos= (gid1+2)*IMAGE_W - gid0 -c -1;
 				//newpos= pos - c+1; //newpos - 2*c;
 				//debug = 1;	
 			}
@@ -78,11 +79,12 @@ __kernel void vertical_convolution(
 	int gid0 = (int) get_global_id(0);
 
 
-	if (gid0 < IMAGE_H && gid1 < IMAGE_W) {
+	if (gid1 < IMAGE_H && gid0 < IMAGE_W) {
 
 		int HALF_FILTER_SIZE = (FILTER_SIZE % 2 == 1 ? (FILTER_SIZE)/2 : (FILTER_SIZE+1)/2);
 
-		int pos = gid0 * IMAGE_W + gid1;
+//		int pos = gid0 * IMAGE_W + gid1;
+		int pos = gid1 * IMAGE_W + gid0;
 		int fIndex = 0;
 		float sum = 0.0f;
 		int r = 0,newpos=0;
@@ -91,12 +93,12 @@ __kernel void vertical_convolution(
 		for (r = -HALF_FILTER_SIZE ; r < FILTER_SIZE-HALF_FILTER_SIZE ; r++) {
 			newpos = pos + r * (IMAGE_W);
 
-			if (gid0+r < 0) {
-				newpos = gid1 -(r+1)*IMAGE_W - gid0*IMAGE_W;
+			if (gid1+r < 0) {
+				newpos = gid0 -(r+1)*IMAGE_W - gid1*IMAGE_W;
 				//debug=1;	
 			}
-			else if (gid0+r > IMAGE_H -1) {
-				newpos= (IMAGE_H-1)*IMAGE_W + gid1 + (IMAGE_H - r)*IMAGE_W - gid0*IMAGE_W;
+			else if (gid1+r > IMAGE_H -1) {
+				newpos= (IMAGE_H-1)*IMAGE_W + gid0 + (IMAGE_H - r)*IMAGE_W - gid1*IMAGE_W;
 			}
 			sum += input[ newpos ] * filter[ fIndex   ];
 			fIndex += 1;

diff --git a/openCL/image.cl b/openCL/image.cl
@@ -54,20 +54,20 @@ __kernel void compute_gradient_orientation(
 	int gid1 = (int) get_global_id(1);
 	int gid0 = (int) get_global_id(0);
 
-	if (gid0 < height && gid1 < width) {
+	if (gid1 < height && gid0 < width) {
 
 		float xgrad, ygrad;
-		int pos = gid0*width+gid1;
+		int pos = gid1*width+gid0;
 
-        if (gid1 == 0)
+        if (gid0 == 0)
 			xgrad = 2.0f * (igray[pos+1] - igray[pos]);
-        else if (gid1 == width-1)
+        else if (gid0 == width-1)
 			xgrad = 2.0f * (igray[pos] - igray[pos-1]);
         else
 			xgrad = igray[pos+1] - igray[pos-1];
-        if (gid0 == 0)
+        if (gid1 == 0)
 			ygrad = 2.0f * (igray[pos] - igray[pos + width]);
-        else if (gid0 == height-1)
+        else if (gid1 == height-1)
 			ygrad = 2.0f * (igray[pos - width] - igray[pos]);
         else
 			ygrad = igray[pos - width] - igray[pos + width];

diff --git a/openCL/preprocess.cl b/openCL/preprocess.cl
@@ -10,32 +10,29 @@
  *   Principal authors: J. Kieffer ([email protected])
  *   Last revision: 30/05/2013
  *
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE. 
+ * 
+ **/
+
 
 
 //OpenCL extensions are silently defined by opencl compiler at compile-time:
@@ -47,12 +44,10 @@
 #else
   #define printf(...)
 #endif
-#pragma OPENCL EXTENSION all : enable
-//#pragma OPENCL EXTENSION cl_khr_local_float32_base_atomics : enable
 
-//#ifndef WORKGROUP_SIZE
-#define WORKGROUP_SIZE 1024
-//#endif
+#ifndef WORKGROUP_SIZE
+    #define WORKGROUP_SIZE 1024
+#endif
 
 #define MAX_CONST_SIZE 16384
 
@@ -61,9 +56,9 @@
  * \brief Cast values of an array of uint8 into a float output array.
  *
  * @param array_int:     Pointer to global memory with the input data as unsigned8 array
- * @param array_float:  Pointer to global memory with the output data as float array
- * @param IMAGE_W:        Width of the image
- * @param IMAGE_H:         Height of the image
+ * @param array_float:   Pointer to global memory with the output data as float array
+ * @param IMAGE_W:       Width of the image
+ * @param IMAGE_H:       Height of the image
  */
 __kernel void
 u8_to_float( __global unsigned char  *array_int,
@@ -72,10 +67,11 @@ u8_to_float( __global unsigned char  *array_int,
              const int IMAGE_H
 )
 {
-    int i = get_global_id(0) * IMAGE_W + get_global_id(1);
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
-        array_float[i]=(float)array_int[i];
+    if ((get_global_id(0)<IMAGE_W) && (get_global_id(1) < IMAGE_H)){
+        int i = get_global_id(0) + IMAGE_W * get_global_id(1);
+    	array_float[i]=(float)array_int[i];
+    } //end test in image
 }//end kernel
 
 /**
@@ -93,10 +89,11 @@ u16_to_float(__global unsigned short  *array_int,
              const int IMAGE_H
 )
 {
-    int i = get_global_id(0) * IMAGE_W + get_global_id(1);
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
-        array_float[i]=(float)array_int[i];
+    if ((get_global_id(0)<IMAGE_W) && (get_global_id(1) < IMAGE_H)){
+    	int i = get_global_id(0) + IMAGE_W * get_global_id(1);
+    	array_float[i]=(float)array_int[i];
+    }
 }//end kernel
 
 
@@ -115,10 +112,11 @@ s32_to_float(    __global int  *array_int,
                  const int IMAGE_H
 )
 {
-    int i = get_global_id(0) * IMAGE_W + get_global_id(1);
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
+    if ((get_global_id(0)<IMAGE_W) && (get_global_id(1) < IMAGE_H)){
+    	int i = get_global_id(0) + IMAGE_W * get_global_id(1);
         array_float[i] = (float)(array_int[i]);
+    }//end test in image
 }//end kernel
 
 /**
@@ -136,10 +134,11 @@ s64_to_float(    __global long *array_int,
                  const int IMAGE_H
 )
 {
-    int i = get_global_id(0) * IMAGE_W + get_global_id(1);
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
+    if ((get_global_id(0)<IMAGE_W) && (get_global_id(1) < IMAGE_H)){
+    	int i = get_global_id(0) + IMAGE_W * get_global_id(1);
         array_float[i] = (float)(array_int[i]);
+    }//end test in image
 }//end kernel
 
 /**
@@ -183,11 +182,11 @@ rgb_to_float(    __global unsigned char *array_int,
                  const int IMAGE_H
 )
 {
-    int i = get_global_id(0) * IMAGE_W + get_global_id(1);
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
+    if ((get_global_id(0)<IMAGE_W) && (get_global_id(1) < IMAGE_H)){
+    	int i = get_global_id(0) + IMAGE_W * get_global_id(1);
         array_float[i] = 0.299f*array_int[3*i] + 0.587f*array_int[3*i+1] + 0.114f*array_int[3*i+2];
-;
+    }  //end test in image
 }//end kernel
 
 
@@ -204,21 +203,18 @@ rgb_to_float(    __global unsigned char *array_int,
  *
 **/
 __kernel void
-normalizes(    __global         float     *image,
-            __constant         float * min_in __attribute__((max_constant_size(MAX_CONST_SIZE))),
-            __constant         float * max_in __attribute__((max_constant_size(MAX_CONST_SIZE))),
-            __constant         float * max_out __attribute__((max_constant_size(MAX_CONST_SIZE))),
+normalizes(    __global       float     *image,
+            __constant        float * min_in __attribute__((max_constant_size(MAX_CONST_SIZE))),
+            __constant        float * max_in __attribute__((max_constant_size(MAX_CONST_SIZE))),
+            __constant        float * max_out __attribute__((max_constant_size(MAX_CONST_SIZE))),
             const             int IMAGE_W,
             const             int IMAGE_H
 )
 {
-    float data;
-    int i = get_global_id(0) * IMAGE_W + get_global_id(1);
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
-    {
-        data = image[i];
-        image[i] = max_out[0]*(data-min_in[0])/(max_in[0]-min_in[0]);
+    if((get_global_id(0) < IMAGE_W) && (get_global_id(1)<IMAGE_H)){
+    	int i = get_global_id(0) + IMAGE_W * get_global_id(1);
+        image[i] = max_out[0]*(image[i]-min_in[0])/(max_in[0]-min_in[0]);
     };//end if in IMAGE
 };//end kernel
 
@@ -239,16 +235,18 @@ shrink(const __global     float     *image_in,
             __global     float     *image_out,
             const             int scale_w,
             const             int scale_h,
-            const             int IMAGE_W,
-            const             int IMAGE_H
+            const             int LARGE_W,
+            const             int LARGE_H,
+            const             int SMALL_W,
+            const             int SMALL_H
 )
 {
     int gid0=get_global_id(0), gid1=get_global_id(1);
-    int j,i = gid0 * IMAGE_W + gid1;
+    int j,i = gid0 + SMALL_W * gid1;
     //Global memory guard for padding
-    if(i < IMAGE_W*IMAGE_H)
+    if ((gid0 < SMALL_W) && (gid1 <SMALL_H))
     {
-        j = gid0*IMAGE_W*scale_w*scale_h + gid1*scale_w;
+        j = gid0 * scale_w + gid1 * scale_h * LARGE_W;
         image_out[i] = image_in[j];
     };//end if in IMAGE
 };//end kernel
@@ -260,16 +258,18 @@ shrink(const __global     float     *image_in,
  *
  * @param image_in        Float pointer to global memory storing the big image.
  * @param image_ou        Float pointer to global memory storing the small image.
- * @param scale_w:     Minimum value in the input array
- * @param scale_h:     Maximum value in the input array
- * @param binned_width:    Width of the output image
- * @param IMAGE_H:     Height of the output image
+ * @param scale_width:    Binning factor in horizontal           
+ * @param scale_heigth:   Binning factor in vertical
+ * @param orig_width:     Original image size in horizontal
+ * @param orig_heigth:    Original image size in vertical
+ * @param binned_width:   Width of the output binned image
+ * @param binned_heigth:  Height of the output binned image
  *
- *Nota: this is a 2D kernel.
+ * Nota: this is a 2D kernel. This is non working and non TESTED !!!
 **/
 __kernel void
 bin(        const    __global     float     *image_in,
-                    __global     float     *image_out,
+                     __global     float     *image_out,
             const                 int     scale_width,
             const                 int     scale_heigth,
             const                 int     orig_width,
@@ -279,29 +279,28 @@ bin(        const    __global     float     *image_in,
 )
 {
     int gid0=get_global_id(0), gid1=get_global_id(1);
-    int j,i = gid0 * binned_width + gid1;
-    int w, h, size_w, size_h, big_h, big_w;
-    float data=0.0f;
     //Global memory guard for padding
-    if(i < binned_width*binned_heigth){
-        size_h = 0;
-        for (h=0; h<scale_heigth; h++){
-            big_h = gid0 * scale_heigth + h;
-            if (big_h < orig_heigth){
-                size_h += 1;
-                size_w = 0;
-                for (w=0; w<scale_width; w++){
-                    big_w = gid1*scale_width + w;
-                    if (big_w < orig_width){
-                        //j = (gid0 * scale_heigth + h) * (binned_width*scale_width) + (gid1*scale_width + w);
-                        size_w += 1;
-                        j = big_h * (binned_width*scale_width) + big_w;
-                        data += image_in[j];
-                    }//end test in image horiz
-                };//end for horiz
-            }//end test in image vert
+    if((gid0 < binned_width) && (gid1 < binned_heigth) ){
+    	int j,i = gid0 + binned_width * gid1;
+        float data=0.0f;
+        int w, h, big_h, big_w;
+        for (h=gid1 * scale_heigth; h<(gid1+1) * scale_heigth; h++){
+            if (h>=orig_heigth){
+            	big_h = 2*orig_heigth - h - 1;
+            }else{
+            	big_h = h;
+            }
+            for (w=gid0*scale_width; w<(gid0+1)*scale_width; w++){
+                if (w>=orig_width){
+                	big_w = 2*orig_width - w - 1;
+                }else{
+                	big_w = w;
+                }
+                j = big_h * orig_width + big_w;
+				data += image_in[j];
+			};//end for horiz
         };//end for vertical
-        image_out[i] = data/size_h/size_w;
+        image_out[i] = data/((float)(scale_width*scale_heigth));
     };//end if in IMAGE
 };//end kernel
 
@@ -352,3 +351,4 @@ divide_cst(    __global     float     *data,
 }
 
 
+