forked from kif/sift_pyocl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvolution.cl
119 lines (80 loc) · 2.23 KB
/
convolution.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
Separate convolution with global memory access
The borders are handled directly in the kernel (by symetrization),
so the input image does not need to be pre-processed
*/
#define MAX_CONST_SIZE 16384
__kernel void horizontal_convolution(
const __global float * input,
__global float * output,
__constant float * filter __attribute__((max_constant_size(MAX_CONST_SIZE))),
int FILTER_SIZE,
int IMAGE_W,
int IMAGE_H
)
{
int gid1 = (int) get_global_id(1);
int gid0 = (int) get_global_id(0);
int HALF_FILTER_SIZE = (FILTER_SIZE % 2 == 1 ? (FILTER_SIZE)/2 : (FILTER_SIZE+1)/2);
if (gid1 < IMAGE_H && gid0 < IMAGE_W) {
// int pos = gid0* IMAGE_W + gid1;
int pos = gid1*IMAGE_W + gid0;
int fIndex = 0;
float sum = 0.0f;
int c = 0;
int newpos = 0;
int debug=0;
for (c = -HALF_FILTER_SIZE ; c < FILTER_SIZE-HALF_FILTER_SIZE ; c++) {
newpos = pos + c;
if (gid0 + c < 0) {
//debug=1;
newpos= pos - 2*gid0 - c - 1;
}
else if (gid0 + c > IMAGE_W -1 ) {
newpos= (gid1+2)*IMAGE_W - gid0 -c -1;
//newpos= pos - c+1; //newpos - 2*c;
//debug = 1;
}
sum += input[ newpos ] * filter[ fIndex ];
fIndex += 1;
}
output[pos]=sum;
}
}
__kernel void vertical_convolution(
const __global float * input,
__global float * output,
__constant float * filter __attribute__((max_constant_size(MAX_CONST_SIZE))),
int FILTER_SIZE,
int IMAGE_W,
int IMAGE_H
)
{
int gid1 = (int) get_global_id(1);
int gid0 = (int) get_global_id(0);
if (gid1 < IMAGE_H && gid0 < IMAGE_W) {
int HALF_FILTER_SIZE = (FILTER_SIZE % 2 == 1 ? (FILTER_SIZE)/2 : (FILTER_SIZE+1)/2);
// int pos = gid0 * IMAGE_W + gid1;
int pos = gid1 * IMAGE_W + gid0;
int fIndex = 0;
float sum = 0.0f;
int r = 0,newpos=0;
int debug=0;
for (r = -HALF_FILTER_SIZE ; r < FILTER_SIZE-HALF_FILTER_SIZE ; r++) {
newpos = pos + r * (IMAGE_W);
if (gid1+r < 0) {
newpos = gid0 -(r+1)*IMAGE_W - gid1*IMAGE_W;
//debug=1;
}
else if (gid1+r > IMAGE_H -1) {
newpos= (IMAGE_H-1)*IMAGE_W + gid0 + (IMAGE_H - r)*IMAGE_W - gid1*IMAGE_W;
}
sum += input[ newpos ] * filter[ fIndex ];
fIndex += 1;
}
output[pos]=sum;
if (debug == 1) output[pos]=0;
}
}
/*
*/