3 * Copyright 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
26 #include <drm/drm_dsc.h>
30 #include "qp_tables.h"
32 #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min)
34 #define MODE_SELECT(val444, val422, val420) \
35 (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420))
38 #define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \
39 table = qp_table_##mode##_##bpc##bpc_##max; \
40 table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \
44 static void get_qp_set(qp_set qps
, enum colour_mode cm
, enum bits_per_comp bpc
,
45 enum max_min max_min
, float bpp
)
47 int mode
= MODE_SELECT(444, 422, 420);
48 int sel
= table_hash(mode
, bpc
, max_min
);
51 const struct qp_entry
*table
= 0L;
54 enum { min
= DAL_MM_MIN
, max
= DAL_MM_MAX
};
56 TABLE_CASE(444, 8, max
);
57 TABLE_CASE(444, 8, min
);
58 TABLE_CASE(444, 10, max
);
59 TABLE_CASE(444, 10, min
);
60 TABLE_CASE(444, 12, max
);
61 TABLE_CASE(444, 12, min
);
62 TABLE_CASE(422, 8, max
);
63 TABLE_CASE(422, 8, min
);
64 TABLE_CASE(422, 10, max
);
65 TABLE_CASE(422, 10, min
);
66 TABLE_CASE(422, 12, max
);
67 TABLE_CASE(422, 12, min
);
68 TABLE_CASE(420, 8, max
);
69 TABLE_CASE(420, 8, min
);
70 TABLE_CASE(420, 10, max
);
71 TABLE_CASE(420, 10, min
);
72 TABLE_CASE(420, 12, max
);
73 TABLE_CASE(420, 12, min
);
79 index
= (bpp
- table
[0].bpp
) * 2;
81 /* requested size is bigger than the table */
82 if (index
>= table_size
) {
83 dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n");
87 memcpy(qps
, table
[index
].qps
, sizeof(qp_set
));
90 static double dsc_roundf(double num
)
100 static double dsc_ceil(double num
)
102 double retval
= (int)num
;
104 if (retval
!= num
&& num
> 0)
110 static void get_ofs_set(qp_set ofs
, enum colour_mode mode
, float bpp
)
114 if (mode
== CM_444
|| mode
== CM_RGB
) {
115 *p
++ = (bpp
<= 6) ? (0) : ((((bpp
>= 8) && (bpp
<= 12))) ? (2) : ((bpp
>= 15) ? (10) : ((((bpp
> 6) && (bpp
< 8))) ? (0 + dsc_roundf((bpp
- 6) * (2 / 2.0))) : (2 + dsc_roundf((bpp
- 12) * (8 / 3.0))))));
116 *p
++ = (bpp
<= 6) ? (-2) : ((((bpp
>= 8) && (bpp
<= 12))) ? (0) : ((bpp
>= 15) ? (8) : ((((bpp
> 6) && (bpp
< 8))) ? (-2 + dsc_roundf((bpp
- 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp
- 12) * (8 / 3.0))))));
117 *p
++ = (bpp
<= 6) ? (-2) : ((((bpp
>= 8) && (bpp
<= 12))) ? (0) : ((bpp
>= 15) ? (6) : ((((bpp
> 6) && (bpp
< 8))) ? (-2 + dsc_roundf((bpp
- 6) * (2 / 2.0))) : (0 + dsc_roundf((bpp
- 12) * (6 / 3.0))))));
118 *p
++ = (bpp
<= 6) ? (-4) : ((((bpp
>= 8) && (bpp
<= 12))) ? (-2) : ((bpp
>= 15) ? (4) : ((((bpp
> 6) && (bpp
< 8))) ? (-4 + dsc_roundf((bpp
- 6) * (2 / 2.0))) : (-2 + dsc_roundf((bpp
- 12) * (6 / 3.0))))));
119 *p
++ = (bpp
<= 6) ? (-6) : ((((bpp
>= 8) && (bpp
<= 12))) ? (-4) : ((bpp
>= 15) ? (2) : ((((bpp
> 6) && (bpp
< 8))) ? (-6 + dsc_roundf((bpp
- 6) * (2 / 2.0))) : (-4 + dsc_roundf((bpp
- 12) * (6 / 3.0))))));
120 *p
++ = (bpp
<= 12) ? (-6) : ((bpp
>= 15) ? (0) : (-6 + dsc_roundf((bpp
- 12) * (6 / 3.0))));
121 *p
++ = (bpp
<= 12) ? (-8) : ((bpp
>= 15) ? (-2) : (-8 + dsc_roundf((bpp
- 12) * (6 / 3.0))));
122 *p
++ = (bpp
<= 12) ? (-8) : ((bpp
>= 15) ? (-4) : (-8 + dsc_roundf((bpp
- 12) * (4 / 3.0))));
123 *p
++ = (bpp
<= 12) ? (-8) : ((bpp
>= 15) ? (-6) : (-8 + dsc_roundf((bpp
- 12) * (2 / 3.0))));
124 *p
++ = (bpp
<= 12) ? (-10) : ((bpp
>= 15) ? (-8) : (-10 + dsc_roundf((bpp
- 12) * (2 / 3.0))));
126 *p
++ = (bpp
<= 6) ? (-12) : ((bpp
>= 8) ? (-10) : (-12 + dsc_roundf((bpp
- 6) * (2 / 2.0))));
130 } else if (mode
== CM_422
) {
131 *p
++ = (bpp
<= 8) ? (2) : ((bpp
>= 10) ? (10) : (2 + dsc_roundf((bpp
- 8) * (8 / 2.0))));
132 *p
++ = (bpp
<= 8) ? (0) : ((bpp
>= 10) ? (8) : (0 + dsc_roundf((bpp
- 8) * (8 / 2.0))));
133 *p
++ = (bpp
<= 8) ? (0) : ((bpp
>= 10) ? (6) : (0 + dsc_roundf((bpp
- 8) * (6 / 2.0))));
134 *p
++ = (bpp
<= 8) ? (-2) : ((bpp
>= 10) ? (4) : (-2 + dsc_roundf((bpp
- 8) * (6 / 2.0))));
135 *p
++ = (bpp
<= 8) ? (-4) : ((bpp
>= 10) ? (2) : (-4 + dsc_roundf((bpp
- 8) * (6 / 2.0))));
136 *p
++ = (bpp
<= 8) ? (-6) : ((bpp
>= 10) ? (0) : (-6 + dsc_roundf((bpp
- 8) * (6 / 2.0))));
137 *p
++ = (bpp
<= 8) ? (-8) : ((bpp
>= 10) ? (-2) : (-8 + dsc_roundf((bpp
- 8) * (6 / 2.0))));
138 *p
++ = (bpp
<= 8) ? (-8) : ((bpp
>= 10) ? (-4) : (-8 + dsc_roundf((bpp
- 8) * (4 / 2.0))));
139 *p
++ = (bpp
<= 8) ? (-8) : ((bpp
>= 10) ? (-6) : (-8 + dsc_roundf((bpp
- 8) * (2 / 2.0))));
140 *p
++ = (bpp
<= 8) ? (-10) : ((bpp
>= 10) ? (-8) : (-10 + dsc_roundf((bpp
- 8) * (2 / 2.0))));
142 *p
++ = (bpp
<= 6) ? (-12) : ((bpp
>= 7) ? (-10) : (-12 + dsc_roundf((bpp
- 6) * (2.0 / 1))));
147 *p
++ = (bpp
<= 6) ? (2) : ((bpp
>= 8) ? (10) : (2 + dsc_roundf((bpp
- 6) * (8 / 2.0))));
148 *p
++ = (bpp
<= 6) ? (0) : ((bpp
>= 8) ? (8) : (0 + dsc_roundf((bpp
- 6) * (8 / 2.0))));
149 *p
++ = (bpp
<= 6) ? (0) : ((bpp
>= 8) ? (6) : (0 + dsc_roundf((bpp
- 6) * (6 / 2.0))));
150 *p
++ = (bpp
<= 6) ? (-2) : ((bpp
>= 8) ? (4) : (-2 + dsc_roundf((bpp
- 6) * (6 / 2.0))));
151 *p
++ = (bpp
<= 6) ? (-4) : ((bpp
>= 8) ? (2) : (-4 + dsc_roundf((bpp
- 6) * (6 / 2.0))));
152 *p
++ = (bpp
<= 6) ? (-6) : ((bpp
>= 8) ? (0) : (-6 + dsc_roundf((bpp
- 6) * (6 / 2.0))));
153 *p
++ = (bpp
<= 6) ? (-8) : ((bpp
>= 8) ? (-2) : (-8 + dsc_roundf((bpp
- 6) * (6 / 2.0))));
154 *p
++ = (bpp
<= 6) ? (-8) : ((bpp
>= 8) ? (-4) : (-8 + dsc_roundf((bpp
- 6) * (4 / 2.0))));
155 *p
++ = (bpp
<= 6) ? (-8) : ((bpp
>= 8) ? (-6) : (-8 + dsc_roundf((bpp
- 6) * (2 / 2.0))));
156 *p
++ = (bpp
<= 6) ? (-10) : ((bpp
>= 8) ? (-8) : (-10 + dsc_roundf((bpp
- 6) * (2 / 2.0))));
158 *p
++ = (bpp
<= 4) ? (-12) : ((bpp
>= 5) ? (-10) : (-12 + dsc_roundf((bpp
- 4) * (2 / 1.0))));
165 static int median3(int a
, int b
, int c
)
177 static void _do_calc_rc_params(struct rc_params
*rc
, enum colour_mode cm
,
178 enum bits_per_comp bpc
, u16 drm_bpp
,
179 bool is_navite_422_or_420
,
180 int slice_width
, int slice_height
,
185 float initial_xmit_delay_factor
;
189 bpp
= ((float)drm_bpp
/ 16.0);
190 /* in native_422 or native_420 modes, the bits_per_pixel is double the
191 * target bpp (the latter is what calc_rc_params expects)
193 if (is_navite_422_or_420
)
196 rc
->rc_quant_incr_limit0
= ((bpc
== BPC_8
) ? 11 : (bpc
== BPC_10
? 15 : 19)) - ((minor_version
== 1 && cm
== CM_444
) ? 1 : 0);
197 rc
->rc_quant_incr_limit1
= ((bpc
== BPC_8
) ? 11 : (bpc
== BPC_10
? 15 : 19)) - ((minor_version
== 1 && cm
== CM_444
) ? 1 : 0);
199 bpp_group
= MODE_SELECT(bpp
, bpp
* 2.0, bpp
* 2.0);
203 rc
->initial_fullness_offset
= (bpp
>= 6) ? (2048) : ((bpp
<= 4) ? (6144) : ((((bpp
> 4) && (bpp
<= 5))) ? (6144 - dsc_roundf((bpp
- 4) * (512))) : (5632 - dsc_roundf((bpp
- 5) * (3584)))));
204 rc
->first_line_bpg_offset
= median3(0, (12 + (int) (0.09 * min(34, slice_height
- 8))), (int)((3 * bpc
* 3) - (3 * bpp_group
)));
205 rc
->second_line_bpg_offset
= median3(0, 12, (int)((3 * bpc
* 3) - (3 * bpp_group
)));
208 rc
->initial_fullness_offset
= (bpp
>= 8) ? (2048) : ((bpp
<= 7) ? (5632) : (5632 - dsc_roundf((bpp
- 7) * (3584))));
209 rc
->first_line_bpg_offset
= median3(0, (12 + (int) (0.09 * min(34, slice_height
- 8))), (int)((3 * bpc
* 4) - (3 * bpp_group
)));
210 rc
->second_line_bpg_offset
= 0;
214 rc
->initial_fullness_offset
= (bpp
>= 12) ? (2048) : ((bpp
<= 8) ? (6144) : ((((bpp
> 8) && (bpp
<= 10))) ? (6144 - dsc_roundf((bpp
- 8) * (512 / 2))) : (5632 - dsc_roundf((bpp
- 10) * (3584 / 2)))));
215 rc
->first_line_bpg_offset
= median3(0, (12 + (int) (0.09 * min(34, slice_height
- 8))), (int)(((3 * bpc
+ (cm
== CM_444
? 0 : 2)) * 3) - (3 * bpp_group
)));
216 rc
->second_line_bpg_offset
= 0;
220 initial_xmit_delay_factor
= (cm
== CM_444
|| cm
== CM_RGB
) ? 1.0 : 2.0;
221 rc
->initial_xmit_delay
= dsc_roundf(8192.0/2.0/bpp
/initial_xmit_delay_factor
);
223 if (cm
== CM_422
|| cm
== CM_420
)
226 padding_pixels
= ((slice_width
% 3) != 0) ? (3 - (slice_width
% 3)) * (rc
->initial_xmit_delay
/ slice_width
) : 0;
227 if (3 * bpp_group
>= (((rc
->initial_xmit_delay
+ 2) / 3) * (3 + (cm
== CM_422
)))) {
228 if ((rc
->initial_xmit_delay
+ padding_pixels
) % 3 == 1)
229 rc
->initial_xmit_delay
++;
232 rc
->flatness_min_qp
= ((bpc
== BPC_8
) ? (3) : ((bpc
== BPC_10
) ? (7) : (11))) - ((minor_version
== 1 && cm
== CM_444
) ? 1 : 0);
233 rc
->flatness_max_qp
= ((bpc
== BPC_8
) ? (12) : ((bpc
== BPC_10
) ? (16) : (20))) - ((minor_version
== 1 && cm
== CM_444
) ? 1 : 0);
234 rc
->flatness_det_thresh
= 2 << (bpc
- 8);
236 get_qp_set(rc
->qp_min
, cm
, bpc
, DAL_MM_MIN
, bpp
);
237 get_qp_set(rc
->qp_max
, cm
, bpc
, DAL_MM_MAX
, bpp
);
238 if (cm
== CM_444
&& minor_version
== 1) {
239 for (i
= 0; i
< QP_SET_SIZE
; ++i
) {
240 rc
->qp_min
[i
] = rc
->qp_min
[i
] > 0 ? rc
->qp_min
[i
] - 1 : 0;
241 rc
->qp_max
[i
] = rc
->qp_max
[i
] > 0 ? rc
->qp_max
[i
] - 1 : 0;
244 get_ofs_set(rc
->ofs
, cm
, bpp
);
246 /* fixed parameters */
247 rc
->rc_model_size
= 8192;
248 rc
->rc_edge_factor
= 6;
249 rc
->rc_tgt_offset_hi
= 3;
250 rc
->rc_tgt_offset_lo
= 3;
252 rc
->rc_buf_thresh
[0] = 896;
253 rc
->rc_buf_thresh
[1] = 1792;
254 rc
->rc_buf_thresh
[2] = 2688;
255 rc
->rc_buf_thresh
[3] = 3584;
256 rc
->rc_buf_thresh
[4] = 4480;
257 rc
->rc_buf_thresh
[5] = 5376;
258 rc
->rc_buf_thresh
[6] = 6272;
259 rc
->rc_buf_thresh
[7] = 6720;
260 rc
->rc_buf_thresh
[8] = 7168;
261 rc
->rc_buf_thresh
[9] = 7616;
262 rc
->rc_buf_thresh
[10] = 7744;
263 rc
->rc_buf_thresh
[11] = 7872;
264 rc
->rc_buf_thresh
[12] = 8000;
265 rc
->rc_buf_thresh
[13] = 8064;
268 static u32
_do_bytes_per_pixel_calc(int slice_width
, u16 drm_bpp
,
269 bool is_navite_422_or_420
)
273 double d_bytes_per_pixel
;
275 bpp
= ((float)drm_bpp
/ 16.0);
276 d_bytes_per_pixel
= dsc_ceil(bpp
* slice_width
/ 8.0) / slice_width
;
277 // TODO: Make sure the formula for calculating this is precise (ceiling
278 // vs. floor, and at what point they should be applied)
279 if (is_navite_422_or_420
)
280 d_bytes_per_pixel
/= 2;
282 bytes_per_pixel
= (u32
)dsc_ceil(d_bytes_per_pixel
* 0x10000000);
284 return bytes_per_pixel
;
287 static u32
_do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps
, u32 pix_clk_100hz
,
288 u32 bpp_increment_div
)
290 u32 dsc_target_bpp_x16
;
291 float f_dsc_target_bpp
;
292 float f_stream_bandwidth_100bps
;
293 // bpp_increment_div is actually precision
294 u32 precision
= bpp_increment_div
;
296 f_stream_bandwidth_100bps
= stream_bandwidth_kbps
* 10.0f
;
297 f_dsc_target_bpp
= f_stream_bandwidth_100bps
/ pix_clk_100hz
;
299 // Round down to the nearest precision stop to bring it into DSC spec
301 dsc_target_bpp_x16
= (u32
)(f_dsc_target_bpp
* precision
);
302 dsc_target_bpp_x16
= (dsc_target_bpp_x16
* 16) / precision
;
304 return dsc_target_bpp_x16
;
308 * calc_rc_params - reads the user's cmdline mode
309 * @rc: DC internal DSC parameters
310 * @pps: DRM struct with all required DSC values
312 * This function expects a drm_dsc_config data struct with all the required DSC
313 * values previously filled out by our driver and based on this information it
314 * computes some of the DSC values.
316 * @note This calculation requires float point operation, most of it executes
317 * under kernel_fpu_{begin,end}.
319 void calc_rc_params(struct rc_params
*rc
, const struct drm_dsc_config
*pps
)
321 enum colour_mode mode
;
322 enum bits_per_comp bpc
;
323 bool is_navite_422_or_420
;
324 u16 drm_bpp
= pps
->bits_per_pixel
;
325 int slice_width
= pps
->slice_width
;
326 int slice_height
= pps
->slice_height
;
328 mode
= pps
->convert_rgb
? CM_RGB
: (pps
->simple_422
? CM_444
:
329 (pps
->native_422
? CM_422
:
330 pps
->native_420
? CM_420
: CM_444
));
331 bpc
= (pps
->bits_per_component
== 8) ? BPC_8
: (pps
->bits_per_component
== 10)
334 is_navite_422_or_420
= pps
->native_422
|| pps
->native_420
;
337 _do_calc_rc_params(rc
, mode
, bpc
, drm_bpp
, is_navite_422_or_420
,
338 slice_width
, slice_height
,
339 pps
->dsc_version_minor
);
344 * calc_dsc_bytes_per_pixel - calculate bytes per pixel
345 * @pps: DRM struct with all required DSC values
347 * Based on the information inside drm_dsc_config, this function calculates the
348 * total of bytes per pixel.
350 * @note This calculation requires float point operation, most of it executes
351 * under kernel_fpu_{begin,end}.
354 * Return the number of bytes per pixel
356 u32
calc_dsc_bytes_per_pixel(const struct drm_dsc_config
*pps
)
360 u16 drm_bpp
= pps
->bits_per_pixel
;
361 int slice_width
= pps
->slice_width
;
362 bool is_navite_422_or_420
= pps
->native_422
|| pps
->native_420
;
365 ret
= _do_bytes_per_pixel_calc(slice_width
, drm_bpp
,
366 is_navite_422_or_420
);
372 * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
373 * @stream_bandwidth_kbps:
375 * @bpp_increment_div:
377 * Calculate the total of bits per pixel for DSC configuration.
379 * @note This calculation requires float point operation, most of it executes
380 * under kernel_fpu_{begin,end}.
382 u32
calc_dsc_bpp_x16(u32 stream_bandwidth_kbps
, u32 pix_clk_100hz
,
383 u32 bpp_increment_div
)
388 dsc_bpp
= _do_calc_dsc_bpp_x16(stream_bandwidth_kbps
, pix_clk_100hz
,