2 * This file is part of FFmpeg.
4 * FFmpeg is free software
; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation
; either
7 * version
2.1 of the License
, or
(at your option
) any later version.
9 * FFmpeg is distributed in the hope that it will be useful
,
10 * but WITHOUT ANY WARRANTY
; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg
; if not, write to the Free Software
16 * Foundation
, Inc.
, 51 Franklin Street
, Fifth Floor
, Boston
, MA
02110-
1301 USA
19 #define REFERENCE_WHITE
100.0f
20 extern float3 lrgb2yuv
(float3);
21 extern float lrgb2y
(float3);
22 extern float3 yuv2lrgb
(float3);
23 extern float3 lrgb2lrgb
(float3);
24 extern float get_luma_src
(float3);
25 extern float get_luma_dst
(float3);
26 extern float3 ootf
(float3 c
, float peak
);
27 extern float3 inverse_ootf
(float3 c
, float peak
);
28 extern float3 get_chroma_sample
(float3, float3
, float3
, float3
);
30 struct detection_result
{
35 float hable_f
(float in
) {
36 float a
= 0.15f
, b
= 0.50f
, c
= 0.10f
, d
= 0.20f
, e
= 0.02f
, f
= 0.30f
;
37 return
(in * (in * a
+ b
* c
) + d
* e
) / (in * (in * a
+ b
) + d
* f
) - e
/ f
;
40 float direct
(float s
, float peak
) {
44 float linear
(float s
, float peak
) {
45 return s
* tone_param
/ peak
;
48 float gamma
(float s
, float peak
) {
49 float p
= s
> 0.05f ? s
/peak
: 0.05f
/ peak
;
50 float v
= powr
(p, 1.0f
/ tone_param
);
51 return s
> 0.05f ? v
: (s * v
/0.05f
);
54 float clip
(float s
, float peak
) {
55 return clamp
(s * tone_param
, 0.0f
, 1.0f
);
58 float reinhard
(float s
, float peak
) {
59 return s
/ (s + tone_param
) * (peak + tone_param
) / peak
;
62 float hable
(float s
, float peak
) {
63 return hable_f
(s)/hable_f
(peak);
66 float mobius
(float s
, float peak
) {
73 a
= -j
* j
* (peak -
1.0f
) / (j * j -
2.0f
* j
+ peak
);
74 b
= (j * j -
2.0f
* j
* peak
+ peak
) / max
(peak -
1.0f
, 1e-6f
);
76 return
(b * b
+ 2.0f
* b
* j
+ j
* j
) / (b - a
) * (s + a
) / (s + b
);
79 // detect peak
/average signal of a frame
, the algorithm was ported from
:
80 // libplacebo
(https://github.com
/haasn
/libplacebo
)
81 struct detection_result
82 detect_peak_avg
(global uint
*util_buf
, __local uint
*sum_wg
,
83 float signal
, float peak
) {
84 // layout of the util buffer
86 // Name
: : Size
(units of
4-bytes
)
87 // average buffer
: detection_frames
+ 1
88 // peak buffer
: detection_frames
+ 1
89 // workgroup counter
: 1
91 // total of average
: 1
94 global uint
*avg_buf
= util_buf
;
95 global uint
*peak_buf
= avg_buf
+ DETECTION_FRAMES
+ 1;
96 global uint
*counter_wg_p
= peak_buf
+ DETECTION_FRAMES
+ 1;
97 global uint
*max_total_p
= counter_wg_p
+ 1;
98 global uint
*avg_total_p
= max_total_p
+ 1;
99 global uint
*frame_idx_p
= avg_total_p
+ 1;
100 global uint
*scene_frame_num_p
= frame_idx_p
+ 1;
102 uint frame_idx
= *frame_idx_p
;
103 uint scene_frame_num
= *scene_frame_num_p
;
105 size_t lidx
= get_local_id
(0);
106 size_t lidy
= get_local_id
(1);
107 size_t lsizex
= get_local_size
(0);
108 size_t lsizey
= get_local_size
(1);
109 uint num_wg
= get_num_groups
(0) * get_num_groups
(1);
110 size_t group_idx
= get_group_id
(0);
111 size_t group_idy
= get_group_id
(1);
112 struct detection_result r
= {peak
, sdr_avg
};
113 if
(lidx == 0 && lidy
== 0)
115 barrier
(CLK_LOCAL_MEM_FENCE);
117 // update workgroup sum
118 atomic_add
(sum_wg, (uint)(signal * REFERENCE_WHITE
));
119 barrier
(CLK_LOCAL_MEM_FENCE);
121 // update frame peak
/avg using work-group-average.
122 if
(lidx == 0 && lidy
== 0) {
123 uint avg_wg
= *sum_wg
/ (lsizex * lsizey
);
124 atomic_max
(&peak_buf
[frame_idx], avg_wg);
125 atomic_add(&avg_buf[frame_idx], avg_wg
);
128 if
(scene_frame_num > 0) {
129 float peak
= (float)*max_total_p
/ (REFERENCE_WHITE * scene_frame_num
);
130 float avg
= (float)*avg_total_p
/ (REFERENCE_WHITE * scene_frame_num
);
131 r.peak
= max
(1.0f
, peak
);
132 r.average
= max
(0.25f
, avg
);
135 if
(lidx == 0 && lidy
== 0 && atomic_add
(counter_wg_p, 1) == num_wg -
1) {
137 avg_buf
[frame_idx] /= num_wg;
139 if (scene_threshold > 0.0f) {
140 uint cur_max = peak_buf[frame_idx];
141 uint cur_avg
= avg_buf
[frame_idx];
142 int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p;
144 if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) {
145 for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
147 for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
149 *avg_total_p = *max_total_p = 0;
150 *scene_frame_num_p = 0;
151 avg_buf[frame_idx] = cur_avg
;
152 peak_buf
[frame_idx] = cur_max;
155 uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1);
156 // add current frame, subtract next frame
157 *max_total_p += peak_buf[frame_idx] - peak_buf
[next];
158 *avg_total_p += avg_buf[frame_idx] - avg_buf[next];
160 peak_buf
[next] = avg_buf[next] = 0;
162 *scene_frame_num_p
= min
(*scene_frame_num_p
+ 1,
163 (uint)DETECTION_FRAMES
);
168 float3 map_one_pixel_rgb
(float3 rgb
, float peak
, float average
) {
169 float sig
= max
(max(rgb.x
, max
(rgb.y
, rgb.z
)), 1e-6f
);
171 // Rescale the variables in order to bring it into a representation where
172 // 1.0 represents the dst_peak. This is because all of the tone mapping
173 // algorithms are defined in such a way that they map to the range
[0.0, 1.0].
174 if
(target_peak > 1.0f
) {
175 sig
*= 1.0f
/ target_peak
;
176 peak
*= 1.0f
/ target_peak
;
181 // Scale the signal to compensate for differences in the average brightness
182 float slope
= min
(1.0f
, sdr_avg
/ average
);
186 // Desaturate the color using a coefficient dependent on the signal level
187 if
(desat_param > 0.0f
) {
188 float luma
= get_luma_dst
(rgb);
189 float coeff
= max
(sig -
0.18f
, 1e-6f
) / max
(sig, 1e-6f
);
190 coeff
= native_powr
(coeff, 10.0f
/ desat_param
);
191 rgb
= mix
(rgb, (float3)luma
, (float3)coeff
);
192 sig
= mix
(sig, luma
* slope
, coeff
);
195 sig
= TONE_FUNC
(sig, peak
);
197 sig
= min
(sig, 1.0f
);
198 rgb
*= (sig/sig_old
);
201 // map from source space YUV to destination space RGB
202 float3 map_to_dst_space_from_yuv
(float3 yuv
, float peak
) {
203 float3 c
= yuv2lrgb
(yuv);
209 __kernel void tonemap
(__write_only image2d_t dst1
,
210 __read_only image2d_t src1
,
211 __write_only image2d_t dst2
,
212 __read_only image2d_t src2
,
213 global uint
*util_buf
,
218 const sampler_t sampler
= (CLK_NORMALIZED_COORDS_FALSE |
219 CLK_ADDRESS_CLAMP_TO_EDGE |
221 int xi
= get_global_id
(0);
222 int yi
= get_global_id
(1);
223 // each work item process four pixels
227 float y0
= read_imagef
(src1, sampler
, (int2)(x, y
)).x
;
228 float y1
= read_imagef
(src1, sampler
, (int2)(x + 1, y
)).x
;
229 float y2
= read_imagef
(src1, sampler
, (int2)(x, y
+ 1)).x
;
230 float y3
= read_imagef
(src1, sampler
, (int2)(x + 1, y
+ 1)).x
;
231 float2 uv
= read_imagef
(src2, sampler
, (int2)(xi, yi
)).xy
;
233 float3 c0
= map_to_dst_space_from_yuv
((float3)(y0, uv.x
, uv.y
), peak
);
234 float3 c1
= map_to_dst_space_from_yuv
((float3)(y1, uv.x
, uv.y
), peak
);
235 float3 c2
= map_to_dst_space_from_yuv
((float3)(y2, uv.x
, uv.y
), peak
);
236 float3 c3
= map_to_dst_space_from_yuv
((float3)(y3, uv.x
, uv.y
), peak
);
238 float sig0
= max
(c0.x
, max
(c0.y
, c0.z
));
239 float sig1
= max
(c1.x
, max
(c1.y
, c1.z
));
240 float sig2
= max
(c2.x
, max
(c2.y
, c2.z
));
241 float sig3
= max
(c3.x
, max
(c3.y
, c3.z
));
242 float sig
= max
(sig0, max
(sig1, max
(sig2, sig3
)));
244 struct detection_result r
= detect_peak_avg
(util_buf, &sum_wg
, sig
, peak
);
246 float3 c0_old
= c0
, c1_old
= c1
, c2_old
= c2
;
247 c0
= map_one_pixel_rgb
(c0, r.peak
, r.average
);
248 c1
= map_one_pixel_rgb
(c1, r.peak
, r.average
);
249 c2
= map_one_pixel_rgb
(c2, r.peak
, r.average
);
250 c3
= map_one_pixel_rgb
(c3, r.peak
, r.average
);
252 c0
= inverse_ootf
(c0, target_peak
);
253 c1
= inverse_ootf
(c1, target_peak
);
254 c2
= inverse_ootf
(c2, target_peak
);
255 c3
= inverse_ootf
(c3, target_peak
);
261 float3 chroma_c
= get_chroma_sample
(c0, c1
, c2
, c3
);
262 float3 chroma
= lrgb2yuv
(chroma_c);
264 if
(xi < get_image_width
(dst2) && yi
< get_image_height
(dst2)) {
265 write_imagef
(dst1, (int2)(x, y
), (float4)(y0, 0.0f
, 0.0f
, 1.0f
));
266 write_imagef
(dst1, (int2)(x+1, y
), (float4)(y1, 0.0f
, 0.0f
, 1.0f
));
267 write_imagef
(dst1, (int2)(x, y
+1), (float4)(y2, 0.0f
, 0.0f
, 1.0f
));
268 write_imagef
(dst1, (int2)(x+1, y
+1), (float4)(y3, 0.0f
, 0.0f
, 1.0f
));
269 write_imagef
(dst2, (int2)(xi, yi
),
270 (float4)(chroma.y
, chroma.z
, 0.0f
, 1.0f
));