1 #define LAPLACE_RADIUS
2
4 void minmax
(float4 x1
, float4 x2
, float4 x3
,
10 float16 first
= (float16)(x1, x2
, x3
, x4
);
13 float8 min1
= fmin
(first.hi
, first.lo
);
14 float8 max1
= fmax
(first.hi
, first.lo
);
17 float4 min2
= fmin
(min1.hi
, min1.lo
);
18 float4 max2
= fmax
(max1.hi
, max1.lo
);
21 *min_result
= fmin
(min2, x5
);
22 *max_result
= fmax
(max2, x5
);
25 kernel void pre_edgelaplace
(const global float4
*in
,
28 int gidx
= get_global_id
(0);
29 int gidy
= get_global_id
(1);
30 int src_width
= get_global_size
(0) + LAPLACE_RADIUS
;
31 int src_height
= get_global_size
(1) + LAPLACE_RADIUS
;
33 int i
= gidx
+ 1, j
= gidy
+ 1;
35 float4 pix_fl
= in
[(i -
1) + (j -
1)*src_width
];
36 float4 pix_fm
= in
[(i -
0) + (j -
1)*src_width
];
37 float4 pix_fr
= in
[(i + 1) + (j -
1)*src_width
];
38 float4 pix_ml
= in
[(i -
1) + (j -
0)*src_width
];
39 float4 pix_mm
= in
[(i -
0) + (j -
0)*src_width
];
40 float4 pix_mr
= in
[(i + 1) + (j -
0)*src_width
];
41 float4 pix_bl
= in
[(i -
1) + (j + 1)*src_width
];
42 float4 pix_bm
= in
[(i -
0) + (j + 1)*src_width
];
43 float4 pix_br
= in
[(i + 1) + (j + 1)*src_width
];
45 float4 minval
, maxval
;
46 minmax
(pix_fm, pix_bm
, pix_ml
, pix_mr
,
47 pix_mm
, &minval
, &maxval
);
48 float4 gradient
= fmax
((maxval - pix_mm
), (pix_mm - minval
))
49 * select
((float4)0.5f
, (float4)-
0.5f
,
50 (pix_fl + pix_fm
+ pix_fr
+
51 pix_bm -
8.0f
* pix_mm
+ pix_br
+
52 pix_ml
+ pix_mr
+ pix_bl
) < EPSILON
);
53 gradient.w
= pix_mm.w
;
55 out
[gidx
+ gidy
* get_global_size
(0)] = gradient
;
58 kernel void knl_edgelaplace
(const global float4
*in
,
61 int gidx
= get_global_id
(0);
62 int gidy
= get_global_id
(1);
64 int src_width
= get_global_size
(0) + LAPLACE_RADIUS
;
65 int src_height
= get_global_size
(1) + LAPLACE_RADIUS
;
67 int i
= gidx
+ 1, j
= gidy
+ 1;
69 float4 pix_fl
= in
[(i -
1) + (j -
1)*src_width
];
70 float4 pix_fm
= in
[(i -
0) + (j -
1)*src_width
];
71 float4 pix_fr
= in
[(i + 1) + (j -
1)*src_width
];
72 float4 pix_ml
= in
[(i -
1) + (j -
0)*src_width
];
73 float4 pix_mm
= in
[(i -
0) + (j -
0)*src_width
];
74 float4 pix_mr
= in
[(i + 1) + (j -
0)*src_width
];
75 float4 pix_bl
= in
[(i -
1) + (j + 1)*src_width
];
76 float4 pix_bm
= in
[(i -
0) + (j + 1)*src_width
];
77 float4 pix_br
= in
[(i + 1) + (j + 1)*src_width
];
79 float4 value
= select
(0.0f
, pix_mm
, (pix_mm > 0.0f
) &&
80 (pix_fl < 0.0f || pix_fm
< 0.0f ||
81 pix_fr
< 0.0f || pix_ml
< 0.0f ||
82 pix_mr
< 0.0f || pix_bl
< 0.0f ||
83 pix_bm
< 0.0f || pix_br
< 0.0f
));
86 out
[gidx
+ gidy
* get_global_size
(0)] = value
;