1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Helper functions for swizzling/shuffling.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_debug.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_init.h"
41 #include "lp_bld_logic.h"
42 #include "lp_bld_swizzle.h"
46 lp_build_broadcast(struct gallivm_state
*gallivm
,
50 const unsigned n
= LLVMGetVectorSize(vec_type
);
54 res
= LLVMGetUndef(vec_type
);
55 for(i
= 0; i
< n
; ++i
) {
56 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
57 res
= LLVMBuildInsertElement(gallivm
->builder
, res
, scalar
, index
, "");
68 lp_build_broadcast_scalar(struct lp_build_context
*bld
,
71 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
72 const struct lp_type type
= bld
->type
;
74 assert(lp_check_elem_type(type
, LLVMTypeOf(scalar
)));
76 if (type
.length
== 1) {
82 #if HAVE_LLVM >= 0x207
83 /* The shuffle vector is always made of int32 elements */
84 struct lp_type i32_vec_type
= lp_type_int_vec(32);
85 i32_vec_type
.length
= type
.length
;
87 res
= LLVMBuildInsertElement(builder
, bld
->undef
, scalar
,
88 lp_build_const_int32(bld
->gallivm
, 0), "");
89 res
= LLVMBuildShuffleVector(builder
, res
, bld
->undef
,
90 lp_build_const_int_vec(bld
->gallivm
, i32_vec_type
, 0), "");
92 /* XXX: The above path provokes a bug in LLVM 2.6 */
95 for(i
= 0; i
< type
.length
; ++i
) {
96 LLVMValueRef index
= lp_build_const_int32(bld
->gallivm
, i
);
97 res
= LLVMBuildInsertElement(builder
, res
, scalar
, index
, "");
106 * Combined extract and broadcast (or a mere shuffle when the two types match)
109 lp_build_extract_broadcast(struct gallivm_state
*gallivm
,
110 struct lp_type src_type
,
111 struct lp_type dst_type
,
115 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
118 assert(src_type
.floating
== dst_type
.floating
);
119 assert(src_type
.width
== dst_type
.width
);
121 assert(lp_check_value(src_type
, vector
));
122 assert(LLVMTypeOf(index
) == i32t
);
124 if (src_type
.length
== 1) {
125 if (dst_type
.length
== 1) {
127 * Trivial scalar -> scalar.
134 * Broadcast scalar -> vector.
137 res
= lp_build_broadcast(gallivm
,
138 lp_build_vec_type(gallivm
, dst_type
),
143 if (dst_type
.length
== src_type
.length
) {
145 * Special shuffle of the same size.
148 LLVMValueRef shuffle
;
149 shuffle
= lp_build_broadcast(gallivm
,
150 LLVMVectorType(i32t
, dst_type
.length
),
152 res
= LLVMBuildShuffleVector(gallivm
->builder
, vector
,
153 LLVMGetUndef(lp_build_vec_type(gallivm
, dst_type
)),
158 scalar
= LLVMBuildExtractElement(gallivm
->builder
, vector
, index
, "");
159 if (dst_type
.length
== 1) {
161 * Trivial extract scalar from vector.
168 * General case of different sized vectors.
171 res
= lp_build_broadcast(gallivm
,
172 lp_build_vec_type(gallivm
, dst_type
),
183 * Swizzle one channel into all other three channels.
186 lp_build_swizzle_scalar_aos(struct lp_build_context
*bld
,
190 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
191 const struct lp_type type
= bld
->type
;
192 const unsigned n
= type
.length
;
195 if(a
== bld
->undef
|| a
== bld
->zero
|| a
== bld
->one
)
198 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
199 * using shuffles here actually causes worst results. More investigation is
201 if (type
.width
>= 16) {
205 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
206 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
208 for(j
= 0; j
< n
; j
+= 4)
209 for(i
= 0; i
< 4; ++i
)
210 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ channel
, 0);
212 return LLVMBuildShuffleVector(builder
, a
, bld
->undef
, LLVMConstVector(shuffles
, n
), "");
216 * Bit mask and recursive shifts
218 * XYZW XYZW .... XYZW <= input
219 * 0Y00 0Y00 .... 0Y00
220 * YY00 YY00 .... YY00
221 * YYYY YYYY .... YYYY <= output
223 struct lp_type type4
;
224 const char shifts
[4][2] = {
232 a
= LLVMBuildAnd(builder
, a
,
233 lp_build_const_mask_aos(bld
->gallivm
,
234 type
, 1 << channel
), "");
237 * Build a type where each element is an integer that cover the four
242 type4
.floating
= FALSE
;
246 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type4
), "");
248 for(i
= 0; i
< 2; ++i
) {
249 LLVMValueRef tmp
= NULL
;
250 int shift
= shifts
[channel
][i
];
252 #ifdef PIPE_ARCH_LITTLE_ENDIAN
257 tmp
= LLVMBuildLShr(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type4
, shift
*type
.width
), "");
259 tmp
= LLVMBuildShl(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type4
, -shift
*type
.width
), "");
263 a
= LLVMBuildOr(builder
, a
, tmp
, "");
266 return LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type
), "");
272 lp_build_swizzle_aos(struct lp_build_context
*bld
,
274 const unsigned char swizzles
[4])
276 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
277 const struct lp_type type
= bld
->type
;
278 const unsigned n
= type
.length
;
281 if (swizzles
[0] == PIPE_SWIZZLE_RED
&&
282 swizzles
[1] == PIPE_SWIZZLE_GREEN
&&
283 swizzles
[2] == PIPE_SWIZZLE_BLUE
&&
284 swizzles
[3] == PIPE_SWIZZLE_ALPHA
) {
288 if (swizzles
[0] == swizzles
[1] &&
289 swizzles
[1] == swizzles
[2] &&
290 swizzles
[2] == swizzles
[3]) {
291 switch (swizzles
[0]) {
292 case PIPE_SWIZZLE_RED
:
293 case PIPE_SWIZZLE_GREEN
:
294 case PIPE_SWIZZLE_BLUE
:
295 case PIPE_SWIZZLE_ALPHA
:
296 return lp_build_swizzle_scalar_aos(bld
, a
, swizzles
[0]);
297 case PIPE_SWIZZLE_ZERO
:
299 case PIPE_SWIZZLE_ONE
:
307 if (type
.width
>= 16) {
311 LLVMValueRef undef
= LLVMGetUndef(lp_build_elem_type(bld
->gallivm
, type
));
312 LLVMTypeRef i32t
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
313 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
314 LLVMValueRef aux
[LP_MAX_VECTOR_LENGTH
];
316 memset(aux
, 0, sizeof aux
);
318 for(j
= 0; j
< n
; j
+= 4) {
319 for(i
= 0; i
< 4; ++i
) {
321 switch (swizzles
[i
]) {
325 case PIPE_SWIZZLE_RED
:
326 case PIPE_SWIZZLE_GREEN
:
327 case PIPE_SWIZZLE_BLUE
:
328 case PIPE_SWIZZLE_ALPHA
:
329 shuffle
= j
+ swizzles
[i
];
331 case PIPE_SWIZZLE_ZERO
:
332 shuffle
= type
.length
+ 0;
334 aux
[0] = lp_build_const_elem(bld
->gallivm
, type
, 0.0);
337 case PIPE_SWIZZLE_ONE
:
338 shuffle
= type
.length
+ 1;
340 aux
[1] = lp_build_const_elem(bld
->gallivm
, type
, 1.0);
344 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
348 for (i
= 0; i
< n
; ++i
) {
354 return LLVMBuildShuffleVector(builder
, a
,
355 LLVMConstVector(aux
, n
),
356 LLVMConstVector(shuffles
, n
), "");
359 * Bit mask and shifts.
361 * For example, this will convert BGRA to RGBA by doing
363 * rgba = (bgra & 0x00ff0000) >> 16
364 * | (bgra & 0xff00ff00)
365 * | (bgra & 0x000000ff) << 16
367 * This is necessary not only for faster cause, but because X86 backend
368 * will refuse shuffles of <4 x i8> vectors
371 struct lp_type type4
;
377 * Start with a mixture of 1 and 0.
379 for (chan
= 0; chan
< 4; ++chan
) {
380 if (swizzles
[chan
] == PIPE_SWIZZLE_ONE
) {
384 res
= lp_build_select_aos(bld
, cond
, bld
->one
, bld
->zero
);
387 * Build a type where each element is an integer that cover the four
391 type4
.floating
= FALSE
;
395 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type4
), "");
396 res
= LLVMBuildBitCast(builder
, res
, lp_build_vec_type(bld
->gallivm
, type4
), "");
399 * Mask and shift the channels, trying to group as many channels in the
400 * same shift as possible
402 for (shift
= -3; shift
<= 3; ++shift
) {
403 unsigned long long mask
= 0;
405 assert(type4
.width
<= sizeof(mask
)*8);
407 for (chan
= 0; chan
< 4; ++chan
) {
408 /* FIXME: big endian */
409 if (swizzles
[chan
] < 4 &&
410 chan
- swizzles
[chan
] == shift
) {
411 mask
|= ((1ULL << type
.width
) - 1) << (swizzles
[chan
] * type
.width
);
417 LLVMValueRef shifted
;
420 debug_printf("shift = %i, mask = 0x%08llx\n", shift
, mask
);
422 masked
= LLVMBuildAnd(builder
, a
,
423 lp_build_const_int_vec(bld
->gallivm
, type4
, mask
), "");
425 shifted
= LLVMBuildShl(builder
, masked
,
426 lp_build_const_int_vec(bld
->gallivm
, type4
, shift
*type
.width
), "");
427 } else if (shift
< 0) {
428 shifted
= LLVMBuildLShr(builder
, masked
,
429 lp_build_const_int_vec(bld
->gallivm
, type4
, -shift
*type
.width
), "");
434 res
= LLVMBuildOr(builder
, res
, shifted
, "");
438 return LLVMBuildBitCast(builder
, res
,
439 lp_build_vec_type(bld
->gallivm
, type
), "");
445 * Extended swizzle of a single channel of a SoA vector.
447 * @param bld building context
448 * @param unswizzled array with the 4 unswizzled values
449 * @param swizzle one of the PIPE_SWIZZLE_*
451 * @return the swizzled value.
454 lp_build_swizzle_soa_channel(struct lp_build_context
*bld
,
455 const LLVMValueRef
*unswizzled
,
459 case PIPE_SWIZZLE_RED
:
460 case PIPE_SWIZZLE_GREEN
:
461 case PIPE_SWIZZLE_BLUE
:
462 case PIPE_SWIZZLE_ALPHA
:
463 return unswizzled
[swizzle
];
464 case PIPE_SWIZZLE_ZERO
:
466 case PIPE_SWIZZLE_ONE
:
476 * Extended swizzle of a SoA vector.
478 * @param bld building context
479 * @param unswizzled array with the 4 unswizzled values
480 * @param swizzles array of PIPE_SWIZZLE_*
481 * @param swizzled output swizzled values
484 lp_build_swizzle_soa(struct lp_build_context
*bld
,
485 const LLVMValueRef
*unswizzled
,
486 const unsigned char swizzles
[4],
487 LLVMValueRef
*swizzled
)
491 for (chan
= 0; chan
< 4; ++chan
) {
492 swizzled
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
,
499 * Do an extended swizzle of a SoA vector inplace.
501 * @param bld building context
502 * @param values intput/output array with the 4 values
503 * @param swizzles array of PIPE_SWIZZLE_*
506 lp_build_swizzle_soa_inplace(struct lp_build_context
*bld
,
507 LLVMValueRef
*values
,
508 const unsigned char swizzles
[4])
510 LLVMValueRef unswizzled
[4];
513 for (chan
= 0; chan
< 4; ++chan
) {
514 unswizzled
[chan
] = values
[chan
];
517 lp_build_swizzle_soa(bld
, unswizzled
, swizzles
, values
);