Changes.
[cairo/gpu.git] / src / gpu / cairo-gpu-impl-programs.h
blobf9f37af9e604d4f1e3e42b0c5a931477f789d695
1 typedef struct
3 char* buf;
4 size_t len;
5 size_t size;
6 } cairo_gpu_string_builder_t;
8 static inline void
9 _cairo_gpu_string_builder_enlarge(cairo_gpu_string_builder_t* b, size_t len)
11 if(!b->size)
12 b->size = 16;
14 while((b->len + len) > b->size)
15 b->size <<= 1;
18 buf = malloc(b->size);
19 if(b->buf)
21 memcpy(buf, b->buf, b->len);
22 free(b->buf);
24 b->buf = buf;
27 b->buf = realloc(b->buf, b->size);
30 static void
31 _cairo_gpu_string_builder_write(cairo_gpu_string_builder_t* b, const char* x, int len)
33 if((b->len + len) > b->size)
34 _cairo_gpu_string_builder_enlarge(b, len);
36 memcpy(b->buf + b->len, x, len);
37 b->len += len;
40 static __attribute__ ((__format__ (__printf__, 2, 3))) void
41 _cairo_gpu_string_builder_writef(cairo_gpu_string_builder_t* b, const char* fmt, ...)
43 int n = 0;
44 for(;;)
46 if(b->buf)
48 va_list ap;
50 va_start(ap, fmt);
51 n = vsnprintf(b->buf + b->len, b->size - b->len, fmt, ap);
52 va_end(ap);
54 if((b->len + n) < b->size)
55 break;
58 _cairo_gpu_string_builder_enlarge(b, n + 1);
61 b->len += n;
64 #define OUT_(x) _cairo_gpu_string_builder_write(builder, x, strlen(x))
65 #define OUT(x) OUT_(x L_TERMINATOR "\n")
66 #define OUTF(x, args...) _cairo_gpu_string_builder_writef(builder, x L_TERMINATOR "\n", ## args)
68 typedef struct
70 cairo_gpu_string_builder_t main;
71 cairo_gpu_string_builder_t body;
73 const char* in_position;
74 const char* in_color;
75 const char* in_texcoord[2];
77 const char* out_position;
78 const char* out_color;
79 const char* out_texcoord[2];
81 int has_01_swizzles;
82 int div_uses;
83 int dp2a_uses;
84 int result_color_uses;
85 int tmp_uses;
87 const char* color;
88 const char* color_swizzle;
89 const char* color_alpha_swizzle;
90 int result_color;
91 int result_color_a;
92 size_t len;
93 size_t size;
95 char color_buf[64];
96 } cairo_gpu_program_builder_t;
98 static inline void
99 _cairo_gpu_program_builder_color_mod_insn(cairo_gpu_program_builder_t* p, unsigned tex, int component, const char* insn, const char* args)
101 cairo_gpu_string_builder_t* builder = &p->body;
102 char wmask_buf[4];
103 const char* wmask;
104 const char* tmp;
105 const char* swizzle = "";
106 const char* alpha_swizzle = ".wwww";
108 const char* swizzles[] = {".111w", ".111x", ".111y", ".111z"};
109 const char* alpha_swizzles[] = {".wwww", ".xxxx", ".yyyy", ".zzzz"};
111 if(tex == FRAG_TEX_COLOR_111C)
113 wmask_buf[0] = '.';
114 wmask_buf[1] = "wxyz"[component];
115 wmask_buf[2] = 0;
116 wmask = wmask_buf;
118 else if(tex == FRAG_TEX_COLOR_111CA)
120 wmask_buf[0] = '.';
121 wmask_buf[1] = "wxyz"[component];
122 wmask_buf[2] = 'w';
123 wmask_buf[3] = 0;
124 wmask = wmask_buf;
126 else if(tex == FRAG_TEX_COLOR_RGBA)
127 wmask = "";
128 else
129 wmask = ".w";
131 if(p->color)
133 tmp = L_TMP;
134 ++p->tmp_uses;
136 else
138 tmp = L_RESULT_COLOR_TEMP;
139 ++p->result_color_uses;
140 if(tex == FRAG_TEX_COLOR_RGBA)
141 p->result_color = p->result_color_a = builder->len + strlen(insn) + 1;
144 OUTF("%s %s%s, %s", insn, tmp, wmask, args);
146 if(tex == FRAG_TEX_COLOR_111CA || tex == FRAG_TEX_COLOR_111C)
148 if(tex == FRAG_TEX_COLOR_111CA)
150 p->result_color = p->result_color_a = builder->len + 4;
151 OUTF("MUL %s.w, %s.wwww, %s%s", tmp, tmp, tmp, alpha_swizzles[component]);
153 if(!p->has_01_swizzles)
155 p->result_color = p->result_color_a = builder->len + 4;
156 OUTF("SWZ %s, %s, 1, 1, 1, %c", tmp, tmp, "wxyz"[component]);
158 else
160 swizzle = swizzles[component];
161 alpha_swizzle = alpha_swizzles[component];
164 else if(tex == FRAG_TEX_COLOR_111A)
166 p->result_color = p->result_color_a = builder->len + 4;
167 if(!p->has_01_swizzles)
169 p->result_color = p->result_color_a = builder->len + 4;
170 OUTF("SWZ %s, %s, 1, 1, 1, w", tmp, tmp);
172 else
174 swizzle = ".111w";
177 else if(tex == FRAG_TEX_COLOR_AAAA)
178 swizzle = ".wwww";
180 if(p->color)
182 p->result_color = p->result_color_a = builder->len + 4;
183 OUTF("MUL " L_RESULT_COLOR_TEMP ", %s%s, %s%s", p->color, p->color_swizzle, tmp, swizzle);
184 ++p->result_color_uses;
186 p->color_swizzle = "";
187 p->color_alpha_swizzle = ".wwww";
189 else
191 p->color_swizzle = swizzle;
192 p->color_alpha_swizzle = alpha_swizzle;
194 p->color = L_RESULT_COLOR_TEMP;
197 static inline void
198 _cairo_gpu_program_builder_color_mod(cairo_gpu_program_builder_t* p, const char* v)
200 cairo_gpu_string_builder_t* builder = &p->body;
201 if(p->color)
203 p->result_color = p->result_color_a = builder->len + 4;
204 OUTF("MUL " L_RESULT_COLOR_TEMP ", %s%s, %s", p->color, p->color_swizzle, v);
205 ++p->result_color_uses;
206 p->color = L_RESULT_COLOR_TEMP;
207 p->color_swizzle = "";
208 p->color_alpha_swizzle = ".wwww";
210 else
212 strcpy(p->color_buf, v);
213 p->color = p->color_buf;
214 p->color_swizzle = "";
215 p->color_alpha_swizzle = ".wwww";
219 static inline void
220 _cairo_gpu_program_builder_color_op(cairo_gpu_program_builder_t* p, unsigned op)
222 cairo_gpu_string_builder_t* builder = &p->body;
223 if(p->color)
225 if(op & OP_DIV_ALPHA)
227 if(!strcmp(p->color_swizzle, ".wwww"))
229 if(op == OP_DIV_ALPHA_RGBA)
231 assert(0); // where was the optimizer?!
232 p->result_color = p->result_color_a = builder->len + 4;
233 OUTF("MOV " L_RESULT_COLOR_TEMP ", " L_1);
235 else
237 if(!p->has_01_swizzles)
239 p->result_color = p->result_color_a = builder->len + 4;
240 OUTF("SWZ " L_RESULT_COLOR_TEMP ", %s, 1, 1, 1, w", p->color);
242 else
244 p->color_swizzle = ".111w";
248 else
250 if(p->div_uses >= 0)
252 p->result_color = builder->len + 4;
253 OUTF("DIV " L_RESULT_COLOR_TEMP ".xyz, %s%s, %s%s", p->color, p->color_swizzle, p->color, p->color_alpha_swizzle);
254 ++p->div_uses;
256 else
258 // TODO: what happens if " L_TMP ".w is zero? Does OpenGL guarantee that 0 * (1 / 0) = 0? Or should we add an epsilon value before RCP?
259 OUTF("RCP " L_TMP ".w, %s%s", p->color, p->color_alpha_swizzle);
260 p->result_color = builder->len + 4;
261 OUTF("MUL " L_RESULT_COLOR_TEMP ".xyz, %s%s, " L_TMP, p->color, p->color_swizzle);
262 ++p->tmp_uses;
264 ++p->result_color_uses;
266 if(op == OP_DIV_ALPHA_RGBA)
268 p->result_color_a = builder->len + 4;
269 ++p->result_color_uses;
270 OUT("MOV " L_RESULT_COLOR_TEMP ".w, " L_1);
272 else if(strcmp(p->color, L_RESULT_COLOR_TEMP))
274 p->result_color_a = builder->len + 4;
275 ++p->result_color_uses;
276 OUTF("MOV " L_RESULT_COLOR_TEMP ".w, %s%s", p->color, p->color_alpha_swizzle);
278 else
279 p->result_color = p->result_color_a = -1;
281 p->color = L_RESULT_COLOR_TEMP;
282 p->color_swizzle = "";
283 p->color_alpha_swizzle = ".wwww";
285 else if(op == OP_MUL_ALPHA)
287 p->result_color = builder->len + 4;
288 ++p->result_color_uses;
289 OUTF("MUL " L_RESULT_COLOR_TEMP ".xyz, %s%s, %s.wwww", p->color, p->color_swizzle, p->color);
291 if(strcmp(p->color, L_RESULT_COLOR_TEMP))
293 p->result_color_a = builder->len + 4;
294 ++p->result_color_uses;
295 OUTF("MOV " L_RESULT_COLOR_TEMP ".w, %s.w", p->color);
297 else
298 p->result_color = p->result_color_a = 0;
299 p->color = L_RESULT_COLOR_TEMP;
300 p->color_swizzle = "";
301 p->color_alpha_swizzle = ".wwww";
306 static inline void
307 _cairo_gpu_program_builder_color_write(cairo_gpu_program_builder_t* p)
309 cairo_gpu_string_builder_t* builder = &p->body;
310 if(!p->color)
312 p->color = L_1;
313 p->color_swizzle = "";
314 p->color_alpha_swizzle = ".wwww";
316 else
318 if(p->result_color > 0)
320 L_SET_RESULT_COLOR(&builder->buf[p->result_color]);
321 --p->result_color_uses;
323 if(p->result_color_a > 0 && p->result_color_a != p->result_color)
325 L_SET_RESULT_COLOR(&builder->buf[p->result_color_a]);
326 --p->result_color_uses;
330 if(!p->result_color)
331 OUTF("MOV %s, %s%s", p->out_color, p->color, p->color_swizzle);
334 static inline char*
335 _cairo_gpu_program_builder_finish(cairo_gpu_program_builder_t* p)
337 cairo_gpu_string_builder_t* builder = &p->main;
338 if(p->result_color_uses)
339 OUT(L_TEMP_RESULT_COLOR);
340 if(p->tmp_uses)
341 OUT(L_TEMP_TMP);
343 if(p->body.buf)
345 _cairo_gpu_string_builder_write(builder, p->body.buf, p->body.len);
346 free(p->body.buf);
349 _cairo_gpu_string_builder_write(builder, "END\n", 5); // add null terminator as well
351 //printf("%s\n", builder->buf);
353 return builder->buf;
356 static inline void
357 _cairo_gpu_write_vert_position(cairo_gpu_program_builder_t* p, unsigned vert)
359 cairo_gpu_string_builder_t* builder = &p->body;
360 if(p->has_01_swizzles)
361 OUTF("MAD %s, %s, " L_PROGRAM_ENV "[0].xy00, " L_PROGRAM_ENV "[0].zw11", p->out_position, p->in_position);
362 else
364 OUTF("MAD %s.xy, %s, " L_PROGRAM_ENV "[0], " L_PROGRAM_ENV "[0].zwxy", p->out_position, p->in_position);
365 OUTF("MOV %s.zw, " L_1, p->out_position);
369 static inline void
370 _cairo_gpu_write_vert(cairo_gpu_program_builder_t* p, unsigned vert)
372 cairo_gpu_string_builder_t* builder = &p->body;
373 int i;
374 int pos;
375 int passthru_tex = vert & (VERT_PASSTHRU_PREOP * 3) ? !!(vert & VERT_PASSTHRU_TEX1) : -1;
377 for(i = 0; i < MAX_OPERANDS; ++i)
379 int k = (vert >> (VERT_TEX_SHIFT + i * VERT_TEX_BITS)) & VERT_TEX_MASK;
380 if((k & VERT_TEX_GEN) && i != passthru_tex)
382 const char* input;
384 if(k == VERT_TEX_GEN)
385 input = p->in_position;
386 else
387 input = p->in_texcoord[k - 1];
389 ++p->tmp_uses;
390 OUTF("MAD " L_TMP ", %s.xxxx, " L_PROGRAM_ENV "[%i], " L_PROGRAM_ENV "[%i]", input, VERTENV_TEX_MATRIX_X(i), VERTENV_TEX_MATRIX_W(i));
391 OUTF("MAD %s, %s.yyyy, " L_PROGRAM_ENV "[%i], " L_TMP, p->out_texcoord[i], input, VERTENV_TEX_MATRIX_Y(i));
395 for(pos = 0;; ++pos)
397 if(vert & (VERT_PASSTHRU_PREOP << pos))
399 int k;
400 const char* input;
401 const char* tmp;
402 k = (vert >> (VERT_TEX_SHIFT + passthru_tex * VERT_TEX_BITS)) & VERT_TEX_MASK;
404 if(k == VERT_TEX_GEN)
405 input = p->in_position;
406 else
407 input = p->in_texcoord[k - 1];
409 if(!p->color)
411 tmp = L_RESULT_COLOR_TEMP;
412 p->result_color_uses += 3;
414 else
416 tmp = L_TMP;
417 p->tmp_uses += 3;
420 OUTF("MUL %s, %s.xxxx, " L_PROGRAM_ENV "[0]", tmp, input);
421 OUTF("MAD %s, %s.yyyy, " L_PROGRAM_ENV "[1], %s", tmp, input, tmp);
422 if(!p->color)
423 p->result_color = builder->len + 4;
424 OUTF("ADD %s, %s, " L_PROGRAM_ENV "[2]", tmp, tmp);
425 _cairo_gpu_program_builder_color_mod(p, tmp);
428 if(vert & (VERT_COLOR_PREOP << pos))
429 _cairo_gpu_program_builder_color_mod(p, p->in_color);
431 if(pos == 1)
432 break;
434 _cairo_gpu_program_builder_color_op(p, (vert & VERT_OP_MASK) >> VERT_OP_SHIFT);
437 if(p->color)
438 _cairo_gpu_program_builder_color_write(p);
441 static inline void
442 _cairo_gpu_write_frag(cairo_gpu_program_builder_t* p, unsigned frag)
444 cairo_gpu_string_builder_t* builder = &p->body;
445 int i;
447 for(i = 0; i < MAX_OPERANDS; ++i)
449 unsigned tex = frag >> (FRAG_TEX_SHIFT + i * FRAG_TEX_BITS);
450 if(tex & FRAG_TEX_MASK)
452 char params_buf[64];
453 const char* coord;
455 if(tex & FRAG_TEX_RADIAL)
457 // x = (((x, y, 1) . p1) + sqrt(((x, y, 1) . p1)^2 + ((x^2, y^2, 1) . p2))) * p3.x + p3.z
458 // y = undef * 0 + p3.w
460 OUTF("MUL " L_TMP ".xyw, fragment.texcoord[%i], fragment.texcoord[%i]", i, i);
461 if(GLEW_NV_fragment_program2)
463 OUTF("DP2A " L_TMP ".x, " L_TMP ", " L_PROGRAM_ENV "[%i], " L_PROGRAM_ENV "[%i].w", FRAGENV_TEX_RADIAL_MAC(i), FRAGENV_TEX_RADIAL_MAC(i));
464 OUTF("DP2A " L_TMP ".w, fragment.texcoord[%i], " L_PROGRAM_ENV "[%i], " L_PROGRAM_ENV "[%i].z", i, FRAGENV_TEX_RADIAL_MBD2(i), FRAGENV_TEX_RADIAL_MBD2(i));
465 ++nv_fragment_program2_uses;
467 else
469 OUTF("DP3 " L_TMP ".x, " L_TMP ".xyww, " L_PROGRAM_ENV "[%i].xywz", FRAGENV_TEX_RADIAL_MAC(i));
470 OUTF("DP3 " L_TMP ".w, fragment.texcoord[%i].xyww, " L_PROGRAM_ENV "[%i].xyzw", i, FRAGENV_TEX_RADIAL_MBD2(i));
472 OUT("MAD " L_TMP ".x, " L_TMP ".w, " L_TMP ".w, " L_TMP ".x");
475 // x = (x, y, 1) . p1 + sqrt((x, y, (x, y, 1) . p1)^2 .H p2) * p3.x + p3.z
476 // y = undef * 0 + p3.w
478 OUTF("MUL " L_TMP ", %s, %s", p->in_texcoord[i], p->in_texcoord[i]);
479 OUTF("DPH " L_TMP ".x, " L_TMP ", " L_PROGRAM_ENV "[%i]", FRAGENV_TEX_RADIAL_MAC(i));
480 OUT("RSQ " L_TMP ".x, " L_TMP L_SCALAR_X);
481 OUT("RCP " L_TMP ".x, " L_TMP L_SCALAR_X);
482 // We could save an instruction here by spending an interpolated attrib and complicating the code. Avoid for now
483 OUTF("ADD " L_TMP ".x, " L_TMP ", %s.zzzz", p->in_texcoord[i]);
484 OUTF("MAD " L_TMP ".xy, " L_TMP ".xyxy, " L_PROGRAM_ENV "[%i].xyzw, " L_PROGRAM_ENV "[%i].zwxy", FRAGENV_TEX_RADIAL_SO(i), FRAGENV_TEX_RADIAL_SO(i));
485 coord = L_TMP;
486 ++p->tmp_uses;
488 else
490 coord = p->in_texcoord[i];
493 if(tex & FRAG_TEX_DISCONTINUOUS)
495 // TODO: this is somewhat broken, because it results in "sharp edges" and sometimes 2x2 artifacts at discontinuities
496 // This is especially visible in radial gradients, where an aliased circle will be visible
497 // Note however that this matches the behavior of pixman, so we consider it OK for now.
498 // Furthermore, it may be considered a feature for linear gradients, and radial gradients are seldom used.
499 // To implement smoothing, we need to sample twice considering partial derivatives available (easy even without DDX/DDY)
501 // TODO: on ATI FLR uses 2 instructions. Maybe we can do better.
502 OUTF("FLR " L_TMP ".zw, %s.xyxy", coord);
503 OUTF("ADD " L_TMP ".xy, %s.xyxy, " L_TMP ".zwzw", coord);
504 OUTF("MAD " L_TMP ".x, " L_TMP ", " L_PROGRAM_ENV "[%i], " L_PROGRAM_ENV "[%i].yyyy", FRAGENV_TEX_DISCONTINUOUS_SO(i), FRAGENV_TEX_DISCONTINUOUS_SO(i));
505 coord = "" L_TMP "";
506 ++p->tmp_uses;
509 sprintf(params_buf, "%s, " L_TEXTURE "[%i], %s", coord, i, (tex & FRAG_TEX_RECTANGLE) ? "RECT" : "2D");
511 _cairo_gpu_program_builder_color_mod_insn(p, tex & FRAG_TEX_COLOR_MASK, (int)((tex >> FRAG_COMPONENT_SHIFT) & 3), "TEX", params_buf);
515 unsigned op = frag & FRAG_OP_MASK;
516 if(op && (!i || (frag & FRAG_OPPOS_TEX1)))
517 _cairo_gpu_program_builder_color_op(p, op >> FRAG_OP_SHIFT);
521 if(frag & FRAG_PRIMARY)
522 _cairo_gpu_program_builder_color_mod(p, p->in_color);
524 if(frag & FRAG_CONSTANT)
525 _cairo_gpu_program_builder_color_mod(p, L_PROGRAM_ENV "[0]");
527 _cairo_gpu_program_builder_color_write(p);