Skip to content

Commit 50c9367

Browse files
committed
Merge pull request opencv#3451 from wangyan42164:ocl_pyrup_unrolled
2 parents 73ba435 + 6e70505 commit 50c9367

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

modules/imgproc/src/opencl/pyr_up.cl

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -165,41 +165,41 @@ __kernel void pyrUp_unrolled(__global const uchar * src, int src_step, int src_o
165165

166166
// (x,y)
167167
sum = co3 * s_srcPatch[1 + (ly >> 1)][1 + ((lx - 2) >> 1)];
168-
sum = sum + co1 * s_srcPatch[1 + (ly >> 1)][1 + ((lx ) >> 1)];
169-
sum = sum + co3 * s_srcPatch[1 + (ly >> 1)][1 + ((lx + 2) >> 1)];
168+
sum = mad(co1, s_srcPatch[1 + (ly >> 1)][1 + ((lx ) >> 1)], sum);
169+
sum = mad(co3, s_srcPatch[1 + (ly >> 1)][1 + ((lx + 2) >> 1)], sum);
170170

171171
s_dstPatch[1 + get_local_id(1)][lx] = sum;
172172

173173
// (x+1,y)
174174
sum = co2 * s_srcPatch[1 + (ly >> 1)][1 + ((lx + 1 - 1) >> 1)];
175-
sum = sum + co2 * s_srcPatch[1 + (ly >> 1)][1 + ((lx + 1 + 1) >> 1)];
175+
sum = mad(co2, s_srcPatch[1 + (ly >> 1)][1 + ((lx + 1 + 1) >> 1)], sum);
176176
s_dstPatch[1 + get_local_id(1)][lx+1] = sum;
177177

178178
if (ly < 1)
179179
{
180180
// (x,y)
181181
sum = co3 * s_srcPatch[0][1 + ((lx - 2) >> 1)];
182-
sum = sum + co1 * s_srcPatch[0][1 + ((lx ) >> 1)];
183-
sum = sum + co3 * s_srcPatch[0][1 + ((lx + 2) >> 1)];
182+
sum = mad(co1, s_srcPatch[0][1 + ((lx ) >> 1)], sum);
183+
sum = mad(co3, s_srcPatch[0][1 + ((lx + 2) >> 1)], sum);
184184
s_dstPatch[0][lx] = sum;
185185

186186
// (x+1,y)
187187
sum = co2 * s_srcPatch[0][1 + ((lx + 1 - 1) >> 1)];
188-
sum = sum + co2 * s_srcPatch[0][1 + ((lx + 1 + 1) >> 1)];
188+
sum = mad(co2, s_srcPatch[0][1 + ((lx + 1 + 1) >> 1)], sum);
189189
s_dstPatch[0][lx+1] = sum;
190190
}
191191

192192
if (ly > 2*LOCAL_SIZE-3)
193193
{
194194
// (x,y)
195195
sum = co3 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx - 2) >> 1)];
196-
sum = sum + co1 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx ) >> 1)];
197-
sum = sum + co3 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 2) >> 1)];
196+
sum = mad(co1, s_srcPatch[LOCAL_SIZE+1][1 + ((lx ) >> 1)], sum);
197+
sum = mad(co3, s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 2) >> 1)], sum);
198198
s_dstPatch[LOCAL_SIZE+1][lx] = sum;
199199

200200
// (x+1,y)
201201
sum = co2 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 1 - 1) >> 1)];
202-
sum = sum + co2 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 1 + 1) >> 1)];
202+
sum = mad(co2, s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 1 + 1) >> 1)], sum);
203203
s_dstPatch[LOCAL_SIZE+1][lx+1] = sum;
204204
}
205205

@@ -211,24 +211,24 @@ __kernel void pyrUp_unrolled(__global const uchar * src, int src_step, int src_o
211211
{
212212
// (x,y)
213213
sum = co3 * s_dstPatch[1 + get_local_id(1) - 1][lx];
214-
sum = sum + co1 * s_dstPatch[1 + get_local_id(1) ][lx];
215-
sum = sum + co3 * s_dstPatch[1 + get_local_id(1) + 1][lx];
214+
sum = mad(co1, s_dstPatch[1 + get_local_id(1) ][lx], sum);
215+
sum = mad(co3, s_dstPatch[1 + get_local_id(1) + 1][lx], sum);
216216
storepix(convertToT(sum), dstData + dst_y * dst_step + dst_x * PIXSIZE);
217217

218218
// (x+1,y)
219219
sum = co3 * s_dstPatch[1 + get_local_id(1) - 1][lx+1];
220-
sum = sum + co1 * s_dstPatch[1 + get_local_id(1) ][lx+1];
221-
sum = sum + co3 * s_dstPatch[1 + get_local_id(1) + 1][lx+1];
220+
sum = mad(co1, s_dstPatch[1 + get_local_id(1) ][lx+1], sum);
221+
sum = mad(co3, s_dstPatch[1 + get_local_id(1) + 1][lx+1], sum);
222222
storepix(convertToT(sum), dstData + dst_y * dst_step + (dst_x+1) * PIXSIZE);
223223

224224
// (x,y+1)
225225
sum = co2 * s_dstPatch[1 + get_local_id(1) ][lx];
226-
sum = sum + co2 * s_dstPatch[1 + get_local_id(1) + 1][lx];
226+
sum = mad(co2, s_dstPatch[1 + get_local_id(1) + 1][lx], sum);
227227
storepix(convertToT(sum), dstData + (dst_y+1) * dst_step + dst_x * PIXSIZE);
228228

229229
// (x+1,y+1)
230230
sum = co2 * s_dstPatch[1 + get_local_id(1) ][lx+1];
231-
sum = sum + co2 * s_dstPatch[1 + get_local_id(1) + 1][lx+1];
231+
sum = mad(co2, s_dstPatch[1 + get_local_id(1) + 1][lx+1], sum);
232232
storepix(convertToT(sum), dstData + (dst_y+1) * dst_step + (dst_x+1) * PIXSIZE);
233233
}
234234
}

0 commit comments

Comments
 (0)