g3dvl: Fix support for bicubic resizing
[mesa/nouveau-pmpeg.git] / src / gallium / auxiliary / vl / vl_compositor.c
blobf9e279f12d6a1b38964f2ddfeb036752be4e26bd
1 /**************************************************************************
3 * Copyright 2009 Younes Manton.
4 * Copyright 2011 Maarten Lankhorst
5 * All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
29 #include <assert.h>
31 #include "pipe/p_compiler.h"
32 #include "pipe/p_context.h"
34 #include "util/u_memory.h"
35 #include "util/u_draw.h"
36 #include "util/u_surface.h"
37 #include "util/u_sampler.h"
39 #include "tgsi/tgsi_ureg.h"
41 #include "vl_csc.h"
42 #include "vl_types.h"
43 #include "vl_compositor.h"
45 typedef float csc_matrix[16];
47 /* Set to 1 to run a contour shader */
48 #define DEBUG_CONTOUR 0
50 /* Set to non-zero half-pixel units for finding what would be affected by bicubic resizing */
51 #define DEBUG_BICUBIC 0
53 static void *
54 create_vert_shader(struct vl_compositor *c)
56 struct ureg_program *shader;
57 struct ureg_src vpos, vtex;
58 struct ureg_dst o_vpos, o_vtex;
60 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
61 if (!shader)
62 return false;
64 vpos = ureg_DECL_vs_input(shader, 0);
65 vtex = ureg_DECL_vs_input(shader, 1);
66 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
67 o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1);
70 * o_vpos = vpos
71 * o_vtex = vtex
73 ureg_MOV(shader, o_vpos, vpos);
74 ureg_MOV(shader, o_vtex, vtex);
76 ureg_END(shader);
78 return ureg_create_shader_and_destroy(shader, c->pipe);
81 static void *
82 create_frag_shader_video_buffer(struct vl_compositor *c, unsigned planes)
84 struct ureg_program *shader;
85 struct ureg_src tc;
86 struct ureg_src csc[3];
87 struct ureg_src sampler[3];
88 struct ureg_dst texel;
89 struct ureg_dst fragment;
90 unsigned i;
92 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
93 if (!shader)
94 return false;
96 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
97 for (i = 0; i < 3; ++i) {
98 csc[i] = ureg_DECL_constant(shader, i);
99 if (i < planes)
100 sampler[i] = ureg_DECL_sampler(shader, i);
102 texel = ureg_DECL_temporary(shader);
103 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
106 * texel.xyz = tex(tc, sampler[i])
107 * fragment = csc * texel
109 if (planes == 2) {
110 ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, tc, sampler[0]);
111 ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_YZ), TGSI_TEXTURE_2D, tc, sampler[1]);
112 } else {
113 for (i = 0; i < 3; ++i)
114 ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, tc, sampler[i]);
117 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
119 for (i = 0; i < 3; ++i)
120 ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
122 ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
124 ureg_release_temporary(shader, texel);
125 ureg_END(shader);
127 return ureg_create_shader_and_destroy(shader, c->pipe);
130 static struct ureg_dst
131 calc_line(struct ureg_program *shader, unsigned nearest)
133 struct ureg_dst tmp;
134 struct ureg_src pos;
136 tmp = ureg_DECL_temporary(shader);
138 pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_LINEAR);
141 * tmp.y = fraction(pos.y * .5) >= 0.5 ? 1 : 0
142 * however, for linear interpolation (chroma deinterlace) 2 pixels are required..
144 if (nearest)
145 ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f));
146 else
147 ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.25f));
148 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
149 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
151 return tmp;
154 /* Deinterlace or weave NV12 or YV12 to a temporary video buffer
157 static void *
158 create_frag_shader_weave(struct vl_compositor *c, unsigned luma, unsigned interlaced, unsigned comps)
160 struct ureg_program *shader;
161 struct ureg_src tc, sampler[4];
162 struct ureg_dst field, fragment, swizcolor;
163 unsigned label, writemask, nearest;
164 if (luma)
165 writemask = TGSI_WRITEMASK_X;
166 else if (comps == 2)
167 writemask = TGSI_WRITEMASK_YZ;
168 else
169 writemask = TGSI_WRITEMASK_Y;
171 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
172 if (!shader)
173 return false;
175 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
176 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
177 sampler[0] = ureg_DECL_sampler(shader, 0);
178 sampler[1] = ureg_DECL_sampler(shader, 1);
179 if (!luma && comps == 1) {
180 sampler[2] = ureg_DECL_sampler(shader, 2);
181 sampler[3] = ureg_DECL_sampler(shader, 3);
184 nearest = luma || c->chroma == PIPE_VIDEO_CHROMA_FORMAT_444 || !interlaced;
185 field = calc_line(shader, nearest);
186 swizcolor = ureg_DECL_temporary(shader);
188 /* field.y = fraction(coord/2) >= .5 (from vl_mc.c)
190 * if (field.y)
191 * swiz = sampler[bottom];
192 * else
193 * swiz = sampler[top];
195 * if (LUMA)
196 * color.x = swiz;
197 * else
198 * color.xy = swiz.yz;
201 ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
203 struct ureg_dst adjtc = ureg_DECL_temporary(shader);
204 if (!nearest) {
205 /* -2.0 / c->video_h (1 pixel up, chroma = half height, full height wouldn't need this)
206 * + .5 / c->video_h (.25 pixel down, since interlaced first pixel = .75 first
208 ureg_MOV(shader, ureg_writemask(adjtc, TGSI_WRITEMASK_X), tc);
209 ureg_SUB(shader, ureg_writemask(adjtc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y),
210 ureg_imm1f(shader, 1.5f / c->video_h));
211 } else
212 ureg_MOV(shader, ureg_writemask(adjtc, TGSI_WRITEMASK_XY), tc);
213 ureg_TEX(shader, ureg_writemask(swizcolor, writemask), TGSI_TEXTURE_2D, ureg_src(adjtc), sampler[1]);
214 if (!luma && comps == 1)
215 ureg_TEX(shader, ureg_writemask(swizcolor, TGSI_WRITEMASK_Z), TGSI_TEXTURE_2D, ureg_src(adjtc), sampler[3]);
216 ureg_release_temporary(shader, adjtc);
217 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
219 ureg_ELSE(shader, &label);
221 struct ureg_dst adjtc = ureg_DECL_temporary(shader);
222 if (!nearest) {
223 ureg_MOV(shader, ureg_writemask(adjtc, TGSI_WRITEMASK_X), tc);
224 ureg_ADD(shader, ureg_writemask(adjtc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y),
225 ureg_imm1f(shader, .5f / c->video_h));
226 } else
227 ureg_MOV(shader, ureg_writemask(adjtc, TGSI_WRITEMASK_XY), tc);
228 ureg_TEX(shader, ureg_writemask(swizcolor, writemask), TGSI_TEXTURE_2D, ureg_src(adjtc), sampler[0]);
229 if (!luma && comps == 1)
230 ureg_TEX(shader, ureg_writemask(swizcolor, TGSI_WRITEMASK_Z), TGSI_TEXTURE_2D, ureg_src(adjtc), sampler[2]);
231 ureg_release_temporary(shader, adjtc);
232 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
234 ureg_ENDIF(shader);
236 if (luma)
237 ureg_MOV(shader, ureg_writemask(fragment, writemask), ureg_src(swizcolor));
238 else
239 ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XY),
240 ureg_swizzle(ureg_src(swizcolor), TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z));
242 ureg_release_temporary(shader, swizcolor);
243 ureg_release_temporary(shader, field);
244 ureg_END(shader);
245 return ureg_create_shader_and_destroy(shader, c->pipe);
248 static void *
249 create_frag_shader_bicubic(struct vl_compositor *c, unsigned planes) {
250 struct ureg_src sampler[3], lookup, cst, size, tc, csc[4];
251 struct ureg_dst fragment, tmp, hg_x, hg_y, color, coord[2][2], tex[2][2];
252 int i, j;
254 struct ureg_program *shader;
255 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
256 if (!shader)
257 return NULL;
258 assert(planes == 2 || planes == 3);
260 ureg_property_fs_coord_pixel_center(shader, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
261 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
262 for (i = 0; i < 3; ++i)
263 csc[i] = ureg_DECL_constant(shader, i);
264 cst = ureg_DECL_constant(shader, 4);
265 size = ureg_DECL_constant(shader, 5);
267 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
268 for (i = 0; i < planes; ++i)
269 sampler[i] = ureg_DECL_sampler(shader, i);
270 lookup = ureg_DECL_sampler(shader, planes);
272 tmp = ureg_DECL_temporary(shader);
273 hg_x = ureg_DECL_temporary(shader);
274 hg_y = ureg_DECL_temporary(shader);
275 color = ureg_DECL_temporary(shader);
277 for (i = 0; i < 4; ++i) {
278 coord[i/2][i%2] = ureg_DECL_temporary(shader);
279 tex[i/2][i%2] = ureg_DECL_temporary(shader);
282 for (j = 0; j < planes; ++j) {
283 unsigned writemask, p = j >= 1;
285 if (j == 1 && planes == 2)
286 writemask = TGSI_WRITEMASK_YZ;
287 else
288 writemask = TGSI_WRITEMASK_X << j;
290 if (j == 0 || (j == 1 && c->chroma != PIPE_VIDEO_CHROMA_FORMAT_444)) {
291 /* tmp.xy = tc * size[p].xy
292 * hg_x.xyz = tex1D(lookup, tmp.x);
293 * hg_y.xyz = tex1D(lookup, tmp.y);
295 ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
296 tc, ureg_swizzle(size, 2 * p, 2 * p + 1, 2 * p + 1, 2 * p + 1));
297 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp));
298 ureg_TEX(shader, ureg_writemask(hg_x, TGSI_WRITEMASK_XYZ),
299 TGSI_TEXTURE_1D, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), lookup);
300 ureg_TEX(shader, ureg_writemask(hg_y, TGSI_WRITEMASK_XYZ),
301 TGSI_TEXTURE_1D, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), lookup);
303 #define e_x(x, p) ureg_scalar(x, 2 * p)
304 #define e_y(x, p) ureg_scalar(x, 2 * p + 1)
306 /* coord1X.x += hg_x.x * cst[p].x;
307 * coord0X.x -= hg_x.y * cst[p].x;
309 * coord11.y += hg_y.x * cst[p].w
310 * coord01.y += hg_y.x * cst[p].w
312 * coord10.y -= hg_y.y * cst[p].w
313 * coord00.y -= hg_y.y * cst[p].w
316 ureg_MAD(shader, ureg_writemask(coord[1][0], TGSI_WRITEMASK_X),
317 ureg_scalar(ureg_src(hg_x), TGSI_SWIZZLE_X),
318 e_x(cst, p), tc);
319 ureg_MAD(shader, ureg_writemask(coord[0][0], TGSI_WRITEMASK_X),
320 ureg_negate(ureg_scalar(ureg_src(hg_x), TGSI_SWIZZLE_Y)),
321 e_x(cst, p), tc);
323 for (i = 0; i < 2; ++i)
324 ureg_MOV(shader, ureg_writemask(coord[i][1], TGSI_WRITEMASK_X), ureg_src(coord[i][0]));
325 ureg_MAD(shader, ureg_writemask(coord[1][1], TGSI_WRITEMASK_Y),
326 ureg_scalar(ureg_src(hg_y), TGSI_SWIZZLE_X),
327 e_y(cst, p), tc);
328 ureg_MAD(shader, ureg_writemask(coord[0][1], TGSI_WRITEMASK_Y),
329 ureg_scalar(ureg_src(hg_y), TGSI_SWIZZLE_X),
330 e_y(cst, p), tc);
332 ureg_MAD(shader, ureg_writemask(coord[1][0], TGSI_WRITEMASK_Y),
333 ureg_negate(ureg_scalar(ureg_src(hg_y), TGSI_SWIZZLE_Y)),
334 e_y(cst, p), tc);
335 ureg_MAD(shader, ureg_writemask(coord[0][0], TGSI_WRITEMASK_Y),
336 ureg_negate(ureg_scalar(ureg_src(hg_y), TGSI_SWIZZLE_Y)),
337 e_y(cst, p), tc);
339 #undef e_y
340 #undef e_x
343 for (i = 0; i < 4; ++i) {
344 ureg_TEX(shader, ureg_writemask(tex[i/2][i%2], writemask),
345 TGSI_TEXTURE_2D, ureg_src(coord[i/2][i%2]), sampler[j]);
348 for (i = 0; i < 2; ++i)
349 ureg_LRP(shader, ureg_writemask(tex[i][0], writemask),
350 ureg_scalar(ureg_src(hg_y), TGSI_SWIZZLE_Z),
351 ureg_src(tex[i][0]), ureg_src(tex[i][1]));
353 ureg_LRP(shader, ureg_writemask(color, writemask),
354 ureg_scalar(ureg_src(hg_x), TGSI_SWIZZLE_Z),
355 ureg_src(tex[0][0]), ureg_src(tex[1][0]));
358 for (i = 3; i >= 0; --i) {
359 ureg_release_temporary(shader, tex[i/2][i%2]);
360 ureg_release_temporary(shader, coord[i/2][i%2]);
362 ureg_release_temporary(shader, hg_y);
363 ureg_release_temporary(shader, hg_x);
364 ureg_release_temporary(shader, tmp);
366 ureg_MOV(shader, ureg_writemask(color, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
368 if (DEBUG_BICUBIC) {
369 struct ureg_dst lincolor = ureg_DECL_temporary(shader);
370 unsigned label;
371 float val = ((float)DEBUG_BICUBIC) / 512.f;
372 ureg_TEX(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, tc, sampler[0]);
373 if (planes == 2) {
374 ureg_TEX(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_YZ),
375 TGSI_TEXTURE_2D, tc, sampler[1]);
376 } else {
377 ureg_TEX(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_Y),
378 TGSI_TEXTURE_2D, tc, sampler[1]);
379 ureg_TEX(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_Z),
380 TGSI_TEXTURE_2D, tc, sampler[2]);
382 /* lincolor.xyz = tex2D(...);
383 * lincolor.xyz = |color - lincolor|
384 * lincolor.xyz = lincolor >= DEBUG_BICUBIC / 512.f
385 * if (lincolor.xyz)
386 * color.xyz = { 1.f, .5f, .5f }
387 * endif
389 ureg_SUB(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_XYZ), ureg_src(color), ureg_src(lincolor));
390 ureg_ABS(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_XYZ), ureg_src(lincolor));
391 ureg_SGE(shader, ureg_writemask(lincolor, TGSI_WRITEMASK_XYZ), ureg_src(lincolor), ureg_imm3f(shader, val, val, val));
392 ureg_IF(shader, ureg_swizzle(ureg_src(lincolor), TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z), &label);
393 ureg_MOV(shader, ureg_writemask(color, TGSI_WRITEMASK_XYZ), ureg_imm3f(shader, 1.f, .5f, .5f));
394 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
395 ureg_ENDIF(shader);
396 ureg_release_temporary(shader, lincolor);
399 for (i = 0; i < 3; ++i)
400 ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(color));
401 ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
403 ureg_release_temporary(shader, color);
405 ureg_END(shader);
406 return ureg_create_shader_and_destroy(shader, c->pipe);
409 static void *
410 create_frag_shader_sobel(struct vl_compositor *c)
412 struct ureg_program *shader;
413 struct ureg_src tc, sampler;
414 struct ureg_dst fragment, color[9], tctmp;
415 float v[3] = { -1.f, 0.f, 1.f };
416 int i;
418 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
419 if (!shader)
420 return NULL;
422 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
423 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
424 sampler = ureg_DECL_sampler(shader, 0);
426 tctmp = ureg_DECL_temporary(shader);
427 for (i = 0; i < Elements(color); ++i)
428 color[i] = ureg_DECL_temporary(shader);
430 for (i = 0; i < 9; ++i) {
431 ureg_ADD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_XY), tc, ureg_imm2f(shader, v[i%3]/(float)c->video_w, v[i/3]/(float)c->video_h));
432 ureg_TEX(shader, ureg_writemask(color[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, ureg_src(tctmp), sampler);
435 /* tmp.x = P1 + P3 + 2 * P2 */
436 ureg_ADD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X), ureg_src(color[0]), ureg_src(color[2]));
437 ureg_MAD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X), ureg_src(color[1]), ureg_imm1f(shader, 2.f), ureg_src(tctmp));
439 /* tmp.y = P7 + P9 + 2 * P8 */
440 ureg_ADD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_Y),
441 ureg_scalar(ureg_src(color[6]), TGSI_SWIZZLE_X),
442 ureg_scalar(ureg_src(color[8]), TGSI_SWIZZLE_X));
443 ureg_MAD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_Y),
444 ureg_scalar(ureg_src(color[7]), TGSI_SWIZZLE_X),
445 ureg_imm1f(shader, 2.f), ureg_src(tctmp));
447 /* tmp.z = | tmp.x - tmp.y | */
448 ureg_SUB(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_Z),
449 ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_X),
450 ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_Y));
451 ureg_ABS(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_Z));
453 /* tmp.x = P3 + P9 + 2 * P6 */
454 ureg_ADD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X), ureg_src(color[2]), ureg_src(color[8]));
455 ureg_MAD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X), ureg_src(color[5]), ureg_imm1f(shader, 2.f), ureg_src(tctmp));
457 /* tmp.y = P1 + P7 + 2 * P4 */
458 ureg_ADD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_Y),
459 ureg_scalar(ureg_src(color[0]), TGSI_SWIZZLE_X),
460 ureg_scalar(ureg_src(color[6]), TGSI_SWIZZLE_X));
461 ureg_MAD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_Y),
462 ureg_scalar(ureg_src(color[3]), TGSI_SWIZZLE_X),
463 ureg_imm1f(shader, 2.f), ureg_src(tctmp));
465 /* tmp.x = | tmp.x - tmp.y | */
466 ureg_SUB(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X),
467 ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_X),
468 ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_Y));
469 ureg_ABS(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X), ureg_src(tctmp));
471 /* tmp.x += tmp.z */
472 ureg_ADD(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X),
473 ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_X),
474 ureg_scalar(ureg_src(tctmp), TGSI_SWIZZLE_Z));
476 /* color = color + tmp.x >= .5f */
477 ureg_SGE(shader, ureg_writemask(tctmp, TGSI_WRITEMASK_X), ureg_src(tctmp), ureg_imm1f(shader, 0.4f));
478 ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), ureg_src(color[4]), ureg_imm1f(shader, .25f), ureg_src(tctmp));
480 for (i = 0; i < Elements(color); ++i)
481 ureg_release_temporary(shader, color[i]);
483 ureg_release_temporary(shader, tctmp);
484 ureg_END(shader);
485 return ureg_create_shader_and_destroy(shader, c->pipe);
488 static void *
489 create_frag_shader_palette(struct vl_compositor *c, bool include_cc)
491 struct ureg_program *shader;
492 struct ureg_src csc[3];
493 struct ureg_src tc;
494 struct ureg_src sampler;
495 struct ureg_src palette;
496 struct ureg_dst texel;
497 struct ureg_dst fragment;
498 unsigned i;
500 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
501 if (!shader)
502 return false;
504 for (i = 0; include_cc && i < 3; ++i)
505 csc[i] = ureg_DECL_constant(shader, i);
507 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
508 sampler = ureg_DECL_sampler(shader, 0);
509 palette = ureg_DECL_sampler(shader, 1);
511 texel = ureg_DECL_temporary(shader);
512 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
515 * texel = tex(tc, sampler)
516 * fragment.xyz = tex(texel, palette) * csc
517 * fragment.a = texel.a
519 ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
520 ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel));
522 if (include_cc) {
523 ureg_TEX(shader, texel, TGSI_TEXTURE_1D, ureg_src(texel), palette);
524 for (i = 0; i < 3; ++i)
525 ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
526 } else {
527 ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
528 TGSI_TEXTURE_1D, ureg_src(texel), palette);
531 ureg_release_temporary(shader, texel);
532 ureg_END(shader);
534 return ureg_create_shader_and_destroy(shader, c->pipe);
537 static void *
538 create_frag_shader_rgba(struct vl_compositor *c)
540 struct ureg_program *shader;
541 struct ureg_src tc;
542 struct ureg_src sampler;
543 struct ureg_dst fragment;
545 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
546 if (!shader)
547 return false;
549 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
550 sampler = ureg_DECL_sampler(shader, 0);
551 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
554 * fragment = tex(tc, sampler)
556 ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler);
557 ureg_END(shader);
559 return ureg_create_shader_and_destroy(shader, c->pipe);
562 static bool
563 init_shaders(struct vl_compositor *c)
565 assert(c);
567 c->vs = create_vert_shader(c);
568 if (!c->vs) {
569 debug_printf("Unable to create vertex shader.\n");
570 return false;
573 c->fs_palette.yuv = create_frag_shader_palette(c, true);
574 if (!c->fs_palette.yuv) {
575 debug_printf("Unable to create YUV-Palette-to-RGB fragment shader.\n");
576 return false;
579 c->fs_palette.rgb = create_frag_shader_palette(c, false);
580 if (!c->fs_palette.rgb) {
581 debug_printf("Unable to create RGB-Palette-to-RGB fragment shader.\n");
582 return false;
585 c->fs_rgba = create_frag_shader_rgba(c);
586 if (!c->fs_rgba) {
587 debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
588 return false;
591 return true;
594 static void cleanup_shaders(struct vl_compositor *c)
596 assert(c);
598 c->pipe->delete_vs_state(c->pipe, c->vs);
599 c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv);
600 c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb);
601 c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
604 static bool
605 init_pipe_state(struct vl_compositor *c)
607 struct pipe_rasterizer_state rast;
608 struct pipe_sampler_state sampler;
609 struct pipe_blend_state blend;
610 struct pipe_depth_stencil_alpha_state dsa;
611 unsigned i;
613 assert(c);
615 c->fb_state.nr_cbufs = 1;
616 c->fb_state.zsbuf = NULL;
618 c->viewport.scale[2] = 1;
619 c->viewport.scale[3] = 1;
620 c->viewport.translate[2] = 0;
621 c->viewport.translate[3] = 0;
623 memset(&sampler, 0, sizeof(sampler));
624 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
625 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
626 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
627 sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
628 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
629 sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
630 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
631 sampler.compare_func = PIPE_FUNC_ALWAYS;
632 sampler.normalized_coords = 1;
634 c->sampler_linear = c->pipe->create_sampler_state(c->pipe, &sampler);
636 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
637 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
638 c->sampler_nearest = c->pipe->create_sampler_state(c->pipe, &sampler);
640 sampler.wrap_s = sampler.wrap_t = sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
641 sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
642 sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
643 c->sampler_repeat = c->pipe->create_sampler_state(c->pipe, &sampler);
645 memset(&blend, 0, sizeof blend);
646 blend.independent_blend_enable = 0;
647 blend.rt[0].blend_enable = 0;
648 blend.logicop_enable = 0;
649 blend.logicop_func = PIPE_LOGICOP_CLEAR;
650 blend.rt[0].colormask = PIPE_MASK_RGBA;
651 blend.dither = 0;
652 c->blend_clear = c->pipe->create_blend_state(c->pipe, &blend);
654 blend.rt[0].blend_enable = 1;
655 blend.rt[0].rgb_func = PIPE_BLEND_ADD;
656 blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
657 blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
658 blend.rt[0].alpha_func = PIPE_BLEND_ADD;
659 blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
660 blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
661 c->blend_add = c->pipe->create_blend_state(c->pipe, &blend);
663 memset(&rast, 0, sizeof rast);
664 rast.flatshade = 1;
665 rast.front_ccw = 1;
666 rast.cull_face = PIPE_FACE_NONE;
667 rast.fill_back = PIPE_POLYGON_MODE_FILL;
668 rast.fill_front = PIPE_POLYGON_MODE_FILL;
669 rast.scissor = 1;
670 rast.line_width = 1;
671 rast.point_size_per_vertex = 1;
672 rast.offset_units = 1;
673 rast.offset_scale = 1;
674 rast.gl_rasterization_rules = 1;
676 c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
678 memset(&dsa, 0, sizeof dsa);
679 dsa.depth.enabled = 0;
680 dsa.depth.writemask = 0;
681 dsa.depth.func = PIPE_FUNC_ALWAYS;
682 for (i = 0; i < 2; ++i) {
683 dsa.stencil[i].enabled = 0;
684 dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
685 dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
686 dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
687 dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
688 dsa.stencil[i].valuemask = 0;
689 dsa.stencil[i].writemask = 0;
691 dsa.alpha.enabled = 0;
692 dsa.alpha.func = PIPE_FUNC_ALWAYS;
693 dsa.alpha.ref_value = 0;
694 c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa);
695 c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa);
697 return true;
700 static void cleanup_pipe_state(struct vl_compositor *c)
702 assert(c);
704 /* Asserted in softpipe_delete_fs_state() for some reason */
705 c->pipe->bind_vs_state(c->pipe, NULL);
706 c->pipe->bind_fs_state(c->pipe, NULL);
708 c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa);
709 c->pipe->delete_sampler_state(c->pipe, c->sampler_linear);
710 c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest);
711 c->pipe->delete_sampler_state(c->pipe, c->sampler_repeat);
712 c->pipe->delete_blend_state(c->pipe, c->blend_clear);
713 c->pipe->delete_blend_state(c->pipe, c->blend_add);
714 c->pipe->delete_rasterizer_state(c->pipe, c->rast);
717 static bool
718 create_vertex_buffer(struct vl_compositor *c)
720 assert(c);
722 pipe_resource_reference(&c->vertex_buf.buffer, NULL);
723 c->vertex_buf.buffer = pipe_buffer_create
725 c->pipe->screen,
726 PIPE_BIND_VERTEX_BUFFER,
727 PIPE_USAGE_STREAM,
728 sizeof(struct vertex4f) * VL_COMPOSITOR_MAX_LAYERS * 4
731 return c->vertex_buf.buffer != NULL;
734 static bool
735 init_buffers(struct vl_compositor *c)
737 struct pipe_vertex_element vertex_elems[2];
739 assert(c);
742 * Create our vertex buffer and vertex buffer elements
744 c->vertex_buf.stride = sizeof(struct vertex4f);
745 c->vertex_buf.buffer_offset = 0;
746 create_vertex_buffer(c);
748 vertex_elems[0].src_offset = 0;
749 vertex_elems[0].instance_divisor = 0;
750 vertex_elems[0].vertex_buffer_index = 0;
751 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
752 vertex_elems[1].src_offset = sizeof(struct vertex2f);
753 vertex_elems[1].instance_divisor = 0;
754 vertex_elems[1].vertex_buffer_index = 0;
755 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
756 c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
759 * Create our fragment shader's constant buffer
760 * Const buffer contains the color conversion matrix and bias vectors
762 /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */
763 c->csc_matrix = pipe_buffer_create
765 c->pipe->screen,
766 PIPE_BIND_CONSTANT_BUFFER,
767 PIPE_USAGE_STATIC,
768 sizeof(csc_matrix) + sizeof(c->original_sizes)
770 assert((Elements(c->csc) + Elements(c->original_sizes))/4 == 6); // amount of constants
772 return true;
775 static void
776 cleanup_buffers(struct vl_compositor *c)
778 assert(c);
780 c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems_state);
781 pipe_resource_reference(&c->vertex_buf.buffer, NULL);
782 pipe_resource_reference(&c->csc_matrix, NULL);
785 static INLINE struct pipe_video_rect
786 default_rect(struct vl_compositor_layer *layer)
788 struct pipe_resource *res = layer->sampler_views[0]->texture;
789 struct pipe_video_rect rect = { 0, 0, res->width0, res->height0 };
790 return rect;
793 static INLINE struct vertex2f
794 calc_topleft(struct vertex2f size, struct pipe_video_rect rect)
796 struct vertex2f res = { rect.x / size.x, rect.y / size.y };
797 return res;
800 static INLINE struct vertex2f
801 calc_bottomright(struct vertex2f size, struct pipe_video_rect rect)
803 struct vertex2f res = { (rect.x + rect.w) / size.x, (rect.y + rect.h) / size.y };
804 return res;
807 static INLINE void
808 calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
809 struct pipe_video_rect src, struct pipe_video_rect *dst)
811 struct vertex2f size_in = { width, height };
812 struct vertex2f size_out = { 1.f, 1.f };
814 layer->src.tl = calc_topleft(size_in, src);
815 layer->src.br = calc_bottomright(size_in, src);
816 if (dst) {
817 layer->dst.tl = calc_topleft(size_out, *dst);
818 layer->dst.br = calc_bottomright(size_out, *dst);
819 layer->custom_dest_rect = 1;
820 } else {
821 layer->dst.tl.x = layer->dst.tl.y = 0.f;
822 layer->dst.br.x = layer->dst.br.y = 1.f;
823 layer->custom_dest_rect = 0;
827 static void
828 gen_rect_verts(struct vertex4f *vb, struct vl_compositor_layer *layer, float w, float h)
830 assert(vb && layer);
832 vb[0].x = layer->dst.tl.x / w;
833 vb[0].y = layer->dst.tl.y / h;
834 vb[0].z = layer->src.tl.x;
835 vb[0].w = layer->src.tl.y;
837 vb[1].x = layer->dst.br.x / w;
838 vb[1].y = layer->dst.tl.y / h;
839 vb[1].z = layer->src.br.x;
840 vb[1].w = layer->src.tl.y;
842 vb[2].x = layer->dst.br.x / w;
843 vb[2].y = layer->dst.br.y / h;
844 vb[2].z = layer->src.br.x;
845 vb[2].w = layer->src.br.y;
847 vb[3].x = layer->dst.tl.x / w;
848 vb[3].y = layer->dst.br.y / h;
849 vb[3].z = layer->src.tl.x;
850 vb[3].w = layer->src.br.y;
853 static void
854 gen_vertex_data(struct vl_compositor *c, float w, float h)
856 struct vertex4f *vb;
857 struct pipe_transfer *buf_transfer;
858 unsigned i;
860 assert(c);
862 vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
863 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK,
864 &buf_transfer);
866 if (!vb) {
867 // If buffer is still locked from last draw create a new one
868 create_vertex_buffer(c);
869 vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
870 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
871 &buf_transfer);
874 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
875 if (c->used_layers & (1 << i)) {
876 struct vl_compositor_layer *layer = &c->layers[i];
877 if (layer->custom_dest_rect)
878 gen_rect_verts(vb, layer, w, h);
879 else
880 gen_rect_verts(vb, layer, 1.f, 1.f);
881 vb += 4;
883 if (layer->clearing &&
884 (!layer->custom_dest_rect ||
885 (c->dirty_tl.x >= layer->dst.tl.x/w &&
886 c->dirty_tl.y >= layer->dst.tl.y/h &&
887 c->dirty_br.x <= layer->dst.br.x/w &&
888 c->dirty_br.y <= layer->dst.br.y/h))) {
889 // We clear the dirty area anyway, no need for clear_render_target
890 c->dirty_tl.x = c->dirty_tl.y = 1.0f;
891 c->dirty_br.x = c->dirty_br.y = 0.0f;
896 pipe_buffer_unmap(c->pipe, buf_transfer);
899 static void
900 draw_layers(struct vl_compositor *c, float w, float h)
902 unsigned vb_index, i;
904 assert(c);
906 for (i = 0, vb_index = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
907 if (c->used_layers & (1 << i)) {
908 struct vl_compositor_layer *layer = &c->layers[i];
909 struct pipe_sampler_view **samplers = &layer->sampler_views[0];
910 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : !samplers[3] ? 3 : 4;
912 c->pipe->bind_blend_state(c->pipe, layer->blend);
913 c->pipe->bind_fs_state(c->pipe, layer->fs);
914 c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, layer->samplers);
915 c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
916 util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
917 vb_index++;
919 // Remember the currently drawn area as dirty for the next draw command
920 if (layer->custom_dest_rect) {
921 c->dirty_tl.x = MIN2(layer->dst.tl.x/w, c->dirty_tl.x);
922 c->dirty_tl.y = MIN2(layer->dst.tl.y/h, c->dirty_tl.y);
923 c->dirty_br.x = MAX2(layer->dst.br.x/w, c->dirty_br.x);
924 c->dirty_br.y = MAX2(layer->dst.br.y/h, c->dirty_br.y);
925 } else {
926 c->dirty_tl.x = 0.f;
927 c->dirty_tl.y = 0.f;
928 c->dirty_br.x = 1.f;
929 c->dirty_br.y = 1.f;
935 void
936 vl_compositor_reset_dirty_area(struct vl_compositor *c)
938 assert(c);
940 c->dirty_tl.x = c->dirty_tl.y = 0.0f;
941 c->dirty_br.x = c->dirty_br.y = 1.0f;
944 void
945 vl_compositor_set_clear_color(struct vl_compositor *c, union pipe_color_union *color)
947 assert(c);
949 c->clear_color = *color;
952 void
953 vl_compositor_get_clear_color(struct vl_compositor *c, union pipe_color_union *color)
955 assert(c);
956 assert(color);
958 *color = c->clear_color;
961 void
962 vl_compositor_clear_layers(struct vl_compositor *c)
964 unsigned i, j;
966 assert(c);
968 c->used_layers = 0;
969 for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
970 c->layers[i].clearing = i ? false : true;
971 c->layers[i].blend = i ? c->blend_add : c->blend_clear;
972 c->layers[i].fs = NULL;
973 for ( j = 0; j < 3; j++)
974 pipe_sampler_view_reference(&c->layers[i].sampler_views[j], NULL);
978 static void
979 cleanup_video(struct vl_compositor *c)
981 unsigned i;
982 for (i = 0; i < Elements(c->video_res); ++i) {
983 pipe_sampler_view_reference(&c->video_sv[i], NULL);
984 pipe_surface_reference(&c->video_surf[i], NULL);
985 pipe_resource_reference(&c->video_res[i], NULL);
987 for (i = 0; i < Elements(c->fs_weave); ++i) {
988 if (!c->fs_weave[i])
989 continue;
990 c->pipe->delete_fs_state(c->pipe, c->fs_weave[i]);
991 c->fs_weave[i] = NULL;
994 for (i = 0; i < Elements(c->fs_video_buffer); ++i) {
995 if (c->fs_video_buffer[i])
996 c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer[i]);
997 c->fs_video_buffer[i] = NULL;
1001 void
1002 vl_compositor_cleanup(struct vl_compositor *c)
1004 assert(c);
1006 vl_compositor_clear_layers(c);
1007 cleanup_buffers(c);
1008 cleanup_shaders(c);
1009 cleanup_pipe_state(c);
1010 cleanup_video(c);
1013 void
1014 vl_compositor_set_csc_matrix(struct vl_compositor *c, const float matrix[16])
1016 struct pipe_transfer *buf_transfer;
1017 float *map;
1019 assert(c);
1021 map = pipe_buffer_map(c->pipe, c->csc_matrix,
1022 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1023 &buf_transfer);
1024 memcpy(map, matrix, sizeof(csc_matrix));
1025 memcpy(map + Elements(matrix), c->original_sizes, sizeof(c->original_sizes));
1026 memcpy(c->csc, matrix, sizeof(csc_matrix));
1027 pipe_buffer_unmap(c->pipe, buf_transfer);
1030 void
1031 vl_compositor_set_layer_blend(struct vl_compositor *c,
1032 unsigned layer, void *blend,
1033 bool is_clearing)
1035 assert(c && blend);
1037 assert(layer < VL_COMPOSITOR_MAX_LAYERS);
1039 c->layers[layer].clearing = is_clearing;
1040 c->layers[layer].blend = blend;
1044 static void gen_vertex_data_video(struct vl_compositor *c) {
1045 struct vertex4f *vb;
1046 struct pipe_transfer *buf_transfer;
1047 vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
1048 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK,
1049 &buf_transfer);
1051 if (!vb) {
1052 // If buffer is still locked from last draw create a new one
1053 create_vertex_buffer(c);
1054 vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
1055 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1056 &buf_transfer);
1058 vb[0].x = 0.f;
1059 vb[0].y = 0.f;
1060 vb[0].z = 0.f;
1061 vb[0].w = 0.f;
1063 vb[1].x = 0.f;
1064 vb[1].y = 65535.f;
1065 vb[1].z = 0.f;
1066 vb[1].w = 65535.f;
1068 vb[2].x = 65535.f;
1069 vb[2].y = 0.f;
1070 vb[2].z = 65535.f;
1071 vb[2].w = 0.f;
1072 pipe_buffer_unmap(c->pipe, buf_transfer);
1075 static void
1076 vl_compositor_render_sobel(struct vl_compositor *c, struct pipe_sampler_view *sv)
1078 struct pipe_scissor_state scissor;
1079 struct pipe_surface *dst_surface;
1080 void *fs;
1082 assert(c);
1084 gen_vertex_data_video(c);
1086 dst_surface = c->video_surf[3];
1087 fs = c->fs_weave[5];
1089 c->fb_state.width = dst_surface->width;
1090 c->fb_state.height = dst_surface->height;
1091 c->fb_state.cbufs[0] = dst_surface;
1093 c->viewport.scale[0] = sv->texture->width0;
1094 c->viewport.scale[1] = sv->texture->height0;
1095 c->viewport.translate[0] = 0;
1096 c->viewport.translate[1] = 0;
1098 scissor.minx = 0;
1099 scissor.miny = 0;
1100 scissor.maxx = dst_surface->width;
1101 scissor.maxy = dst_surface->height;
1103 c->pipe->set_scissor_state(c->pipe, &scissor);
1104 c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
1105 c->pipe->set_viewport_state(c->pipe, &c->viewport);
1106 c->pipe->bind_vs_state(c->pipe, c->vs);
1107 c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
1108 c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
1109 c->pipe->bind_rasterizer_state(c->pipe, c->rast);
1111 c->pipe->bind_blend_state(c->pipe, c->blend_clear);
1112 c->pipe->bind_fs_state(c->pipe, fs);
1113 c->pipe->bind_fragment_sampler_states(c->pipe, 1, &c->sampler_nearest);
1114 c->pipe->set_fragment_sampler_views(c->pipe, 1, &sv);
1115 util_draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, 0, 3);
1118 static void
1119 vl_compositor_render_video(struct vl_compositor *c,
1120 struct pipe_sampler_view **sv,
1121 unsigned interlaced)
1123 struct pipe_scissor_state scissor;
1124 void *samplers[4];
1125 unsigned i;
1126 for (i = 0; i < 4; ++i) {
1127 if (!interlaced || i < 2 || c->chroma == PIPE_VIDEO_CHROMA_FORMAT_444)
1128 samplers[i] = c->sampler_nearest;
1129 else
1130 samplers[i] = c->sampler_linear;
1132 assert(c);
1133 gen_vertex_data_video(c);
1134 for (i = 0; i < 2; ++i) {
1135 struct pipe_surface *dst_surface;
1136 unsigned num_sampler_views;
1137 void *fs;
1138 if (!i) {
1139 num_sampler_views = 2;
1140 dst_surface = c->video_surf[0];
1141 fs = c->fs_weave[0];
1142 } else {
1143 num_sampler_views = 2 + 2 * !!sv[2];
1144 if (interlaced) {
1145 dst_surface = c->video_surf[1];
1146 fs = c->fs_weave[1 + !!sv[2]];
1147 } else {
1148 dst_surface = c->video_surf[2];
1149 fs = c->fs_weave[3 + !!sv[2]];
1153 assert(dst_surface);
1154 c->fb_state.width = dst_surface->width;
1155 c->fb_state.height = dst_surface->height;
1156 c->fb_state.cbufs[0] = dst_surface;
1158 c->viewport.scale[0] = sv[0]->texture->width0;
1159 c->viewport.scale[1] = sv[0]->texture->height0 * 2;
1160 if (i && c->chroma != PIPE_VIDEO_CHROMA_FORMAT_444 && interlaced)
1161 c->viewport.scale[1] *= 2;
1162 c->viewport.translate[0] = 0;
1163 c->viewport.translate[1] = 0;
1165 scissor.minx = 0;
1166 scissor.miny = 0;
1167 scissor.maxx = dst_surface->width;
1168 scissor.maxy = dst_surface->height;
1170 c->pipe->set_scissor_state(c->pipe, &scissor);
1171 c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
1172 c->pipe->set_viewport_state(c->pipe, &c->viewport);
1173 c->pipe->bind_vs_state(c->pipe, c->vs);
1174 c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
1175 c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
1176 c->pipe->bind_rasterizer_state(c->pipe, c->rast);
1178 c->pipe->bind_blend_state(c->pipe, c->blend_clear);
1179 c->pipe->bind_fs_state(c->pipe, fs);
1180 c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, samplers);
1181 c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, sv);
1182 sv += num_sampler_views;
1183 util_draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, 0, 3);
1187 void
1188 vl_compositor_set_buffer_layer(struct vl_compositor *c, unsigned layer,
1189 enum pipe_video_picture_structure field,
1190 struct pipe_video_buffer *buffer,
1191 struct pipe_video_rect *src_rect,
1192 struct pipe_video_rect *dst_rect,
1193 unsigned past_count,
1194 struct pipe_video_buffer **past,
1195 unsigned future_count,
1196 struct pipe_video_buffer **future)
1198 struct pipe_sampler_view **sampler_views, *sv[VL_COMPOSITOR_SAMPLERS] = {};
1199 struct pipe_video_rect rect;
1200 unsigned i, half_h = 0;
1202 assert(c && buffer);
1203 assert(c->video_w <= buffer->width && c->video_h <= buffer->height);
1204 assert(layer < VL_COMPOSITOR_MAX_LAYERS);
1206 if (field == PIPE_VIDEO_PICTURE_STRUCTURE_FRAME) {
1207 sampler_views = buffer->get_sampler_view_planes(buffer, 0);
1208 if (!sampler_views) {
1209 sampler_views = buffer->get_sampler_view_planes(buffer, 1);
1210 vl_compositor_render_video(c, sampler_views, 0);
1211 sv[0] = c->video_sv[0];
1212 sv[1] = c->video_sv[2];
1213 } else {
1214 for (i = 0; i < 3; ++i)
1215 sv[i] = sampler_views[i];
1217 } else {
1218 struct pipe_sampler_view **sv_cur, **sv_prev = NULL, *sv_weave[6];
1219 int top = field == PIPE_VIDEO_PICTURE_STRUCTURE_FIELD_TOP;
1220 sv_cur = buffer->get_sampler_view_planes(buffer, 1);
1221 if (past_count && past[0])
1222 sv_prev = buffer->get_sampler_view_planes(past[0], 1);
1223 if (sv_prev) {
1224 for (i = 0; i < 6; i += 2) {
1225 if (top) {
1226 sv_weave[i] = sv_cur[i];
1227 sv_weave[i+1] = sv_prev[i+1];
1228 } else {
1229 sv_weave[i] = sv_prev[i];
1230 sv_weave[i+1] = sv_cur[i+1];
1233 vl_compositor_render_video(c, sv_weave, 1);
1234 sv[0] = c->video_sv[0];
1235 sv[1] = c->video_sv[1];
1236 } else {
1237 for (i = 0; i < 3; ++i)
1238 sv[i] = sv_cur[2*i+!top];
1239 half_h = 1;
1243 if (DEBUG_CONTOUR && !half_h) {
1244 vl_compositor_render_sobel(c, sv[0]);
1245 sv[0] = c->video_sv[3];
1248 c->used_layers |= 1 << layer;
1249 if (!src_rect) {
1250 src_rect = &rect;
1251 rect.x = rect.y = 0;
1252 rect.w = c->video_w;
1253 rect.h = c->video_h;
1255 for (i = 0; i < 3; ++i) {
1256 c->layers[layer].samplers[i] = c->sampler_linear;
1257 pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sv[i]);
1259 if (sv[2]) {
1260 c->layers[layer].fs = c->fs_video_buffer[3];
1261 pipe_sampler_view_reference(&c->layers[layer].sampler_views[3], c->video_sv[4]);
1262 c->layers[layer].samplers[3] = c->sampler_repeat;
1264 else {
1265 c->layers[layer].fs = c->fs_video_buffer[1];
1266 pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], c->video_sv[4]);
1267 c->layers[layer].samplers[2] = c->sampler_repeat;
1269 assert(sv[1]);
1271 if (c->original_sizes[0] != 1.f/(float)sv[0]->texture->width0 ||
1272 c->original_sizes[1] != 1.f/(float)sv[0]->texture->height0 ||
1273 c->original_sizes[2] != 1.f/(float)sv[1]->texture->width0 ||
1274 c->original_sizes[3] != 1.f/(float)sv[1]->texture->height0) {
1275 struct pipe_transfer *buf_transfer;
1276 float *map = pipe_buffer_map(c->pipe, c->csc_matrix,
1277 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1278 &buf_transfer);
1279 c->original_sizes[0] = 1.f/(float)sv[0]->texture->width0;
1280 c->original_sizes[1] = 1.f/(float)sv[0]->texture->height0;
1281 c->original_sizes[2] = 1.f/(float)sv[1]->texture->width0;
1282 c->original_sizes[3] = 1.f/(float)sv[1]->texture->height0;
1283 c->original_sizes[4] = (float)sv[0]->texture->width0;
1284 c->original_sizes[5] = (float)sv[0]->texture->height0;
1285 c->original_sizes[6] = (float)sv[1]->texture->width0;
1286 c->original_sizes[7] = (float)sv[1]->texture->height0;
1287 memcpy(map, c->csc, sizeof(c->csc));
1288 memcpy(map + Elements(c->csc), c->original_sizes, sizeof(c->original_sizes));
1289 pipe_buffer_unmap(c->pipe, buf_transfer);
1292 calc_src_and_dst(&c->layers[layer],
1293 sv[0]->texture->width0,
1294 sv[0]->texture->height0 << half_h,
1295 *src_rect, dst_rect);
1298 void
1299 vl_compositor_set_palette_layer(struct vl_compositor *c,
1300 unsigned layer,
1301 struct pipe_sampler_view *indexes,
1302 struct pipe_sampler_view *palette,
1303 struct pipe_video_rect *src_rect,
1304 struct pipe_video_rect *dst_rect,
1305 bool include_color_conversion)
1307 assert(c && indexes && palette);
1308 int i;
1310 assert(layer < VL_COMPOSITOR_MAX_LAYERS);
1312 c->used_layers |= 1 << layer;
1314 c->layers[layer].fs = include_color_conversion ?
1315 c->fs_palette.yuv : c->fs_palette.rgb;
1317 c->layers[layer].samplers[0] = c->sampler_linear;
1318 c->layers[layer].samplers[1] = c->sampler_nearest;
1319 pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], indexes);
1320 pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], palette);
1321 for (i = 2; i < VL_COMPOSITOR_SAMPLERS; ++i) {
1322 pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], NULL);
1323 c->layers[layer].samplers[i] = NULL;
1325 calc_src_and_dst(&c->layers[layer], indexes->texture->width0, indexes->texture->height0,
1326 src_rect ? *src_rect : default_rect(&c->layers[layer]),
1327 dst_rect);
1330 void
1331 vl_compositor_set_rgba_layer(struct vl_compositor *c,
1332 unsigned layer,
1333 struct pipe_sampler_view *rgba,
1334 struct pipe_video_rect *src_rect,
1335 struct pipe_video_rect *dst_rect)
1337 assert(c && rgba);
1338 int i;
1340 assert(layer < VL_COMPOSITOR_MAX_LAYERS);
1342 c->used_layers |= 1 << layer;
1343 c->layers[layer].fs = c->fs_rgba;
1344 c->layers[layer].samplers[0] = c->sampler_linear;
1345 pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], rgba);
1346 for (i = 1; i < VL_COMPOSITOR_SAMPLERS; ++i) {
1347 pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], NULL);
1348 c->layers[layer].samplers[i] = NULL;
1350 calc_src_and_dst(&c->layers[layer], rgba->texture->width0, rgba->texture->height0,
1351 src_rect ? *src_rect : default_rect(&c->layers[layer]),
1352 dst_rect);
1355 void
1356 vl_compositor_render(struct vl_compositor *c,
1357 struct pipe_surface *dst_surface,
1358 struct pipe_video_rect *dst_area,
1359 struct pipe_video_rect *dst_clip,
1360 bool clear_dirty_area)
1362 struct pipe_scissor_state scissor;
1364 assert(c);
1365 assert(dst_surface);
1367 c->fb_state.width = dst_surface->width;
1368 c->fb_state.height = dst_surface->height;
1369 c->fb_state.cbufs[0] = dst_surface;
1371 if (dst_area) {
1372 c->viewport.scale[0] = dst_area->w;
1373 c->viewport.scale[1] = dst_area->h;
1374 c->viewport.translate[0] = dst_area->x;
1375 c->viewport.translate[1] = dst_area->y;
1376 } else {
1377 c->viewport.scale[0] = dst_surface->width;
1378 c->viewport.scale[1] = dst_surface->height;
1379 c->viewport.translate[0] = 0;
1380 c->viewport.translate[1] = 0;
1383 if (dst_clip) {
1384 scissor.minx = dst_clip->x;
1385 scissor.miny = dst_clip->y;
1386 scissor.maxx = dst_clip->x + dst_clip->w;
1387 scissor.maxy = dst_clip->y + dst_clip->h;
1388 } else {
1389 scissor.minx = 0;
1390 scissor.miny = 0;
1391 scissor.maxx = dst_surface->width;
1392 scissor.maxy = dst_surface->height;
1395 gen_vertex_data(c, dst_surface->width, dst_surface->height);
1397 if (clear_dirty_area && (c->dirty_tl.x < c->dirty_br.x ||
1398 c->dirty_tl.y < c->dirty_br.y)) {
1399 util_clear_render_target(c->pipe, dst_surface, &c->clear_color,
1400 0, 0, dst_surface->width, dst_surface->height);
1401 c->dirty_tl.x = c->dirty_tl.y = 1.0f;
1402 c->dirty_br.x = c->dirty_br.y = 0.0f;
1405 c->pipe->set_scissor_state(c->pipe, &scissor);
1406 c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
1407 c->pipe->set_viewport_state(c->pipe, &c->viewport);
1408 c->pipe->bind_vs_state(c->pipe, c->vs);
1409 c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
1410 c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
1411 c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, c->csc_matrix);
1412 c->pipe->bind_rasterizer_state(c->pipe, c->rast);
1414 draw_layers(c, dst_surface->width, dst_surface->height);
1417 bool
1418 vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
1420 csc_matrix csc_matrix;
1422 c->pipe = pipe;
1424 if (!init_pipe_state(c))
1425 return false;
1427 if (!init_shaders(c)) {
1428 cleanup_pipe_state(c);
1429 return false;
1432 if (!init_buffers(c)) {
1433 cleanup_shaders(c);
1434 cleanup_pipe_state(c);
1435 return false;
1438 vl_compositor_clear_layers(c);
1440 vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
1441 vl_compositor_set_csc_matrix(c, c->csc);
1443 c->clear_color.f[0] = c->clear_color.f[1] = 0.0f;
1444 c->clear_color.f[2] = c->clear_color.f[3] = 0.0f;
1445 vl_compositor_reset_dirty_area(c);
1447 return true;
1450 bool
1451 vl_compositor_init_video(struct vl_compositor *c, struct pipe_context *pipe,
1452 enum pipe_video_chroma_format chroma, unsigned w, unsigned h)
1454 struct pipe_resource templ;
1455 int i;
1456 if (!vl_compositor_init(c, pipe))
1457 return false;
1458 c->video_w = w;
1459 c->video_h = h;
1461 c->fs_video_buffer[0] = create_frag_shader_video_buffer(c, 2);
1462 c->fs_video_buffer[1] = create_frag_shader_bicubic(c, 2);
1463 c->fs_video_buffer[2] = create_frag_shader_video_buffer(c, 3);
1464 c->fs_video_buffer[3] = create_frag_shader_bicubic(c, 3);
1466 for (i = 0; i < Elements(c->fs_video_buffer); ++i) {
1467 if (!c->fs_video_buffer[i]) {
1468 debug_printf("Unable to create YCbCr-to-RGB fragment shader %i.\n", i);
1469 goto fail;
1472 c->fs_weave[0] = create_frag_shader_weave(c, 1, 0, 1);
1473 c->fs_weave[1] = create_frag_shader_weave(c, 0, 1, 2); // CbCr woven interlaced
1474 c->fs_weave[2] = create_frag_shader_weave(c, 0, 1, 1); // Cb, Cr separate interlaced
1475 c->fs_weave[3] = create_frag_shader_weave(c, 0, 0, 2); // CbCr woven progressive
1476 c->fs_weave[4] = create_frag_shader_weave(c, 0, 0, 1); // Cb, Cr separate progressive
1477 if (DEBUG_CONTOUR)
1478 c->fs_weave[5] = create_frag_shader_sobel(c);
1479 for (i = 0; i < Elements(c->fs_weave); ++i) {
1480 if (!DEBUG_CONTOUR && i == 5) continue;
1481 if (!c->fs_weave[i]) {
1482 debug_printf("Unable to create weave fragment shaders [%i].\n", i);
1483 goto fail;
1486 memset(&templ, 0, sizeof(templ));
1487 templ.target = PIPE_TEXTURE_2D;
1488 templ.format = PIPE_FORMAT_R8_UNORM;
1489 templ.width0 = w;
1490 templ.height0 = h;
1491 templ.depth0 = 1;
1492 templ.array_size = 1;
1493 templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
1494 templ.usage = PIPE_USAGE_STATIC;
1495 c->video_res[0] = pipe->screen->resource_create(pipe->screen, &templ);
1496 if (!c->video_res[0]) {
1497 debug_printf("Could not create weave temp frame for luma\n");
1498 goto fail;
1501 if (chroma == PIPE_VIDEO_CHROMA_FORMAT_420)
1502 templ.width0 /= 2;
1503 templ.format = PIPE_FORMAT_R8G8_UNORM;
1504 c->video_res[1] = pipe->screen->resource_create(pipe->screen, &templ);
1505 if (!c->video_res[1]) {
1506 debug_printf("Could not create interlaced temp frame for chroma\n");
1507 goto fail;
1510 if (chroma != PIPE_VIDEO_CHROMA_FORMAT_444)
1511 templ.height0 = h / 2;
1512 c->video_res[2] = pipe->screen->resource_create(pipe->screen, &templ);
1513 if (!c->video_res[2]) {
1514 debug_printf("Could not create deinterlaced temp frame for chroma\n");
1515 goto fail;
1518 if (DEBUG_CONTOUR) {
1519 c->video_res[3] = pipe->screen->resource_create(pipe->screen, c->video_res[0]);
1520 if (!c->video_res[3]) {
1521 debug_printf("Could not create sobel temp frame for luma\n");
1522 goto fail;
1526 memset(&templ, 0, sizeof(templ));
1527 templ.target = PIPE_TEXTURE_1D;
1528 templ.format = PIPE_FORMAT_R16G16B16A16_UNORM;
1529 templ.width0 = 256;
1530 templ.height0 = 1;
1531 templ.depth0 = 1;
1532 templ.array_size = 1;
1533 templ.bind = PIPE_BIND_SAMPLER_VIEW;
1534 templ.usage = PIPE_USAGE_STATIC;
1535 c->video_res[4] = pipe->screen->resource_create(pipe->screen, &templ);
1536 if (!c->video_res[4]) {
1537 debug_printf("Could not generate lookup texture\n");
1538 goto fail;
1539 } else {
1540 struct pipe_transfer *buf_transfer;
1541 unsigned short *map = pipe_buffer_map(c->pipe, c->video_res[4],
1542 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1543 &buf_transfer);
1545 for (i = 0; i < templ.width0; ++i, map += 4) {
1546 float weight[4], h0, h1, g0;
1547 float alpha = (float)i / (float)templ.width0;
1548 float alpha2 = alpha * alpha;
1549 float alpha3 = alpha2 * alpha;
1550 float mul = (float)((sizeof(*map)<<8)-1);
1552 weight[0] = (-alpha3 + 3.f * alpha2 - 3.f * alpha + 1.f) / 6.f;
1553 weight[1] = (3.f * alpha3 - 6.f * alpha2 + 4.f) / 6.f;
1554 weight[2] = (-3.f * alpha3 + 3.f * alpha2 + 3.f * alpha + 1.f) / 6.f;
1555 weight[3] = alpha3 / 6.f;
1556 h0 = 1.f + alpha - weight[1] / (weight[0] + weight[1]);
1557 h1 = 1.f - alpha + weight[3] / (weight[2] + weight[3]);
1558 g0 = weight[0] + weight[1];
1559 map[0] = h0 * mul;
1560 map[1] = h1 * mul;
1561 map[2] = g0 * mul;
1562 map[3] = 0;
1564 pipe_buffer_unmap(c->pipe, buf_transfer);
1567 for (i = 0; i < Elements(c->video_res); ++i) {
1568 struct pipe_sampler_view sv_templ;
1569 struct pipe_surface surf_templ;
1570 if (!c->video_res[i]) continue;
1572 memset(&sv_templ, 0, sizeof(sv_templ));
1573 u_sampler_view_default_template(&sv_templ, c->video_res[i], c->video_res[i]->format);
1574 if (c->video_res[i]->format == PIPE_FORMAT_R8_UNORM)
1575 sv_templ.swizzle_a = sv_templ.swizzle_b = sv_templ.swizzle_g = sv_templ.swizzle_r;
1576 else if (c->video_res[i]->format == PIPE_FORMAT_R8G8_UNORM) {
1577 sv_templ.swizzle_b = PIPE_SWIZZLE_GREEN;
1578 sv_templ.swizzle_g = PIPE_SWIZZLE_RED;
1580 c->video_sv[i] = pipe->create_sampler_view(pipe, c->video_res[i], &sv_templ);
1582 if (!c->video_sv[i]) {
1583 debug_printf("Could not create temp video sampler views\n");
1584 goto fail;
1587 memset(&surf_templ, 0, sizeof(surf_templ));
1588 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
1589 surf_templ.format = c->video_res[i]->format;
1590 c->video_surf[i] = pipe->create_surface(pipe, c->video_res[i], &surf_templ);
1591 if (!c->video_surf[i]) {
1592 debug_printf("Could not create temp video surface\n");
1593 goto fail;
1597 return true;
1599 fail:
1600 vl_compositor_cleanup(c);
1601 return false;