1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Define several functions to decode x86 specific shuffle semantics into a
10 // generic vector mask.
12 //===----------------------------------------------------------------------===//
14 #include "X86ShuffleDecode.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/MathExtras.h"
20 //===----------------------------------------------------------------------===//
21 // Vector Mask Decoding
22 //===----------------------------------------------------------------------===//
26 void DecodeINSERTPSMask(unsigned Imm
, SmallVectorImpl
<int> &ShuffleMask
,
28 // Defaults the copying the dest value.
29 ShuffleMask
.push_back(0);
30 ShuffleMask
.push_back(1);
31 ShuffleMask
.push_back(2);
32 ShuffleMask
.push_back(3);
34 // Decode the immediate.
35 unsigned ZMask
= Imm
& 15;
36 unsigned CountD
= (Imm
>> 4) & 3;
37 unsigned CountS
= SrcIsMem
? 0 : (Imm
>> 6) & 3;
39 // CountS selects which input element to use.
40 unsigned InVal
= 4 + CountS
;
41 // CountD specifies which element of destination to update.
42 ShuffleMask
[CountD
] = InVal
;
43 // ZMask zaps values, potentially overriding the CountD elt.
44 if (ZMask
& 1) ShuffleMask
[0] = SM_SentinelZero
;
45 if (ZMask
& 2) ShuffleMask
[1] = SM_SentinelZero
;
46 if (ZMask
& 4) ShuffleMask
[2] = SM_SentinelZero
;
47 if (ZMask
& 8) ShuffleMask
[3] = SM_SentinelZero
;
50 void DecodeInsertElementMask(unsigned NumElts
, unsigned Idx
, unsigned Len
,
51 SmallVectorImpl
<int> &ShuffleMask
) {
52 assert((Idx
+ Len
) <= NumElts
&& "Insertion out of range");
54 for (unsigned i
= 0; i
!= NumElts
; ++i
)
55 ShuffleMask
.push_back(i
);
56 for (unsigned i
= 0; i
!= Len
; ++i
)
57 ShuffleMask
[Idx
+ i
] = NumElts
+ i
;
61 void DecodeMOVHLPSMask(unsigned NElts
, SmallVectorImpl
<int> &ShuffleMask
) {
62 for (unsigned i
= NElts
/ 2; i
!= NElts
; ++i
)
63 ShuffleMask
.push_back(NElts
+ i
);
65 for (unsigned i
= NElts
/ 2; i
!= NElts
; ++i
)
66 ShuffleMask
.push_back(i
);
70 void DecodeMOVLHPSMask(unsigned NElts
, SmallVectorImpl
<int> &ShuffleMask
) {
71 for (unsigned i
= 0; i
!= NElts
/ 2; ++i
)
72 ShuffleMask
.push_back(i
);
74 for (unsigned i
= 0; i
!= NElts
/ 2; ++i
)
75 ShuffleMask
.push_back(NElts
+ i
);
78 void DecodeMOVSLDUPMask(unsigned NumElts
, SmallVectorImpl
<int> &ShuffleMask
) {
79 for (int i
= 0, e
= NumElts
/ 2; i
< e
; ++i
) {
80 ShuffleMask
.push_back(2 * i
);
81 ShuffleMask
.push_back(2 * i
);
85 void DecodeMOVSHDUPMask(unsigned NumElts
, SmallVectorImpl
<int> &ShuffleMask
) {
86 for (int i
= 0, e
= NumElts
/ 2; i
< e
; ++i
) {
87 ShuffleMask
.push_back(2 * i
+ 1);
88 ShuffleMask
.push_back(2 * i
+ 1);
92 void DecodeMOVDDUPMask(unsigned NumElts
, SmallVectorImpl
<int> &ShuffleMask
) {
93 const unsigned NumLaneElts
= 2;
95 for (unsigned l
= 0; l
< NumElts
; l
+= NumLaneElts
)
96 for (unsigned i
= 0; i
< NumLaneElts
; ++i
)
97 ShuffleMask
.push_back(l
);
100 void DecodePSLLDQMask(unsigned NumElts
, unsigned Imm
,
101 SmallVectorImpl
<int> &ShuffleMask
) {
102 const unsigned NumLaneElts
= 16;
104 for (unsigned l
= 0; l
< NumElts
; l
+= NumLaneElts
)
105 for (unsigned i
= 0; i
< NumLaneElts
; ++i
) {
106 int M
= SM_SentinelZero
;
107 if (i
>= Imm
) M
= i
- Imm
+ l
;
108 ShuffleMask
.push_back(M
);
112 void DecodePSRLDQMask(unsigned NumElts
, unsigned Imm
,
113 SmallVectorImpl
<int> &ShuffleMask
) {
114 const unsigned NumLaneElts
= 16;
116 for (unsigned l
= 0; l
< NumElts
; l
+= NumLaneElts
)
117 for (unsigned i
= 0; i
< NumLaneElts
; ++i
) {
118 unsigned Base
= i
+ Imm
;
120 if (Base
>= NumLaneElts
) M
= SM_SentinelZero
;
121 ShuffleMask
.push_back(M
);
125 void DecodePALIGNRMask(unsigned NumElts
, unsigned Imm
,
126 SmallVectorImpl
<int> &ShuffleMask
) {
127 const unsigned NumLaneElts
= 16;
129 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
130 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
131 unsigned Base
= i
+ Imm
;
132 // if i+imm is out of this lane then we actually need the other source
133 if (Base
>= NumLaneElts
) Base
+= NumElts
- NumLaneElts
;
134 ShuffleMask
.push_back(Base
+ l
);
139 void DecodeVALIGNMask(unsigned NumElts
, unsigned Imm
,
140 SmallVectorImpl
<int> &ShuffleMask
) {
141 // Not all bits of the immediate are used so mask it.
142 assert(isPowerOf2_32(NumElts
) && "NumElts should be power of 2");
143 Imm
= Imm
& (NumElts
- 1);
144 for (unsigned i
= 0; i
!= NumElts
; ++i
)
145 ShuffleMask
.push_back(i
+ Imm
);
148 void DecodePSHUFMask(unsigned NumElts
, unsigned ScalarBits
, unsigned Imm
,
149 SmallVectorImpl
<int> &ShuffleMask
) {
150 unsigned Size
= NumElts
* ScalarBits
;
151 unsigned NumLanes
= Size
/ 128;
152 if (NumLanes
== 0) NumLanes
= 1; // Handle MMX
153 unsigned NumLaneElts
= NumElts
/ NumLanes
;
155 uint32_t SplatImm
= (Imm
& 0xff) * 0x01010101;
156 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
157 for (unsigned i
= 0; i
!= NumLaneElts
; ++i
) {
158 ShuffleMask
.push_back(SplatImm
% NumLaneElts
+ l
);
159 SplatImm
/= NumLaneElts
;
164 void DecodePSHUFHWMask(unsigned NumElts
, unsigned Imm
,
165 SmallVectorImpl
<int> &ShuffleMask
) {
166 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
167 unsigned NewImm
= Imm
;
168 for (unsigned i
= 0, e
= 4; i
!= e
; ++i
) {
169 ShuffleMask
.push_back(l
+ i
);
171 for (unsigned i
= 4, e
= 8; i
!= e
; ++i
) {
172 ShuffleMask
.push_back(l
+ 4 + (NewImm
& 3));
178 void DecodePSHUFLWMask(unsigned NumElts
, unsigned Imm
,
179 SmallVectorImpl
<int> &ShuffleMask
) {
180 for (unsigned l
= 0; l
!= NumElts
; l
+= 8) {
181 unsigned NewImm
= Imm
;
182 for (unsigned i
= 0, e
= 4; i
!= e
; ++i
) {
183 ShuffleMask
.push_back(l
+ (NewImm
& 3));
186 for (unsigned i
= 4, e
= 8; i
!= e
; ++i
) {
187 ShuffleMask
.push_back(l
+ i
);
192 void DecodePSWAPMask(unsigned NumElts
, SmallVectorImpl
<int> &ShuffleMask
) {
193 unsigned NumHalfElts
= NumElts
/ 2;
195 for (unsigned l
= 0; l
!= NumHalfElts
; ++l
)
196 ShuffleMask
.push_back(l
+ NumHalfElts
);
197 for (unsigned h
= 0; h
!= NumHalfElts
; ++h
)
198 ShuffleMask
.push_back(h
);
201 void DecodeSHUFPMask(unsigned NumElts
, unsigned ScalarBits
,
202 unsigned Imm
, SmallVectorImpl
<int> &ShuffleMask
) {
203 unsigned NumLaneElts
= 128 / ScalarBits
;
205 unsigned NewImm
= Imm
;
206 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
207 // each half of a lane comes from different source
208 for (unsigned s
= 0; s
!= NumElts
* 2; s
+= NumElts
) {
209 for (unsigned i
= 0; i
!= NumLaneElts
/ 2; ++i
) {
210 ShuffleMask
.push_back(NewImm
% NumLaneElts
+ s
+ l
);
211 NewImm
/= NumLaneElts
;
214 if (NumLaneElts
== 4) NewImm
= Imm
; // reload imm
218 void DecodeUNPCKHMask(unsigned NumElts
, unsigned ScalarBits
,
219 SmallVectorImpl
<int> &ShuffleMask
) {
220 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
221 // independently on 128-bit lanes.
222 unsigned NumLanes
= (NumElts
* ScalarBits
) / 128;
223 if (NumLanes
== 0) NumLanes
= 1; // Handle MMX
224 unsigned NumLaneElts
= NumElts
/ NumLanes
;
226 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
227 for (unsigned i
= l
+ NumLaneElts
/ 2, e
= l
+ NumLaneElts
; i
!= e
; ++i
) {
228 ShuffleMask
.push_back(i
); // Reads from dest/src1
229 ShuffleMask
.push_back(i
+ NumElts
); // Reads from src/src2
234 void DecodeUNPCKLMask(unsigned NumElts
, unsigned ScalarBits
,
235 SmallVectorImpl
<int> &ShuffleMask
) {
236 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
237 // independently on 128-bit lanes.
238 unsigned NumLanes
= (NumElts
* ScalarBits
) / 128;
239 if (NumLanes
== 0 ) NumLanes
= 1; // Handle MMX
240 unsigned NumLaneElts
= NumElts
/ NumLanes
;
242 for (unsigned l
= 0; l
!= NumElts
; l
+= NumLaneElts
) {
243 for (unsigned i
= l
, e
= l
+ NumLaneElts
/ 2; i
!= e
; ++i
) {
244 ShuffleMask
.push_back(i
); // Reads from dest/src1
245 ShuffleMask
.push_back(i
+ NumElts
); // Reads from src/src2
250 void DecodeVectorBroadcast(unsigned NumElts
,
251 SmallVectorImpl
<int> &ShuffleMask
) {
252 ShuffleMask
.append(NumElts
, 0);
255 void DecodeSubVectorBroadcast(unsigned DstNumElts
, unsigned SrcNumElts
,
256 SmallVectorImpl
<int> &ShuffleMask
) {
257 unsigned Scale
= DstNumElts
/ SrcNumElts
;
259 for (unsigned i
= 0; i
!= Scale
; ++i
)
260 for (unsigned j
= 0; j
!= SrcNumElts
; ++j
)
261 ShuffleMask
.push_back(j
);
264 void decodeVSHUF64x2FamilyMask(unsigned NumElts
, unsigned ScalarSize
,
266 SmallVectorImpl
<int> &ShuffleMask
) {
267 unsigned NumElementsInLane
= 128 / ScalarSize
;
268 unsigned NumLanes
= NumElts
/ NumElementsInLane
;
270 for (unsigned l
= 0; l
!= NumElts
; l
+= NumElementsInLane
) {
271 unsigned Index
= (Imm
% NumLanes
) * NumElementsInLane
;
272 Imm
/= NumLanes
; // Discard the bits we just used.
273 // We actually need the other source.
274 if (l
>= (NumElts
/ 2))
276 for (unsigned i
= 0; i
!= NumElementsInLane
; ++i
)
277 ShuffleMask
.push_back(Index
+ i
);
281 void DecodeVPERM2X128Mask(unsigned NumElts
, unsigned Imm
,
282 SmallVectorImpl
<int> &ShuffleMask
) {
283 unsigned HalfSize
= NumElts
/ 2;
285 for (unsigned l
= 0; l
!= 2; ++l
) {
286 unsigned HalfMask
= Imm
>> (l
* 4);
287 unsigned HalfBegin
= (HalfMask
& 0x3) * HalfSize
;
288 for (unsigned i
= HalfBegin
, e
= HalfBegin
+ HalfSize
; i
!= e
; ++i
)
289 ShuffleMask
.push_back((HalfMask
& 8) ? SM_SentinelZero
: (int)i
);
293 void DecodePSHUFBMask(ArrayRef
<uint64_t> RawMask
, const APInt
&UndefElts
,
294 SmallVectorImpl
<int> &ShuffleMask
) {
295 for (int i
= 0, e
= RawMask
.size(); i
< e
; ++i
) {
296 uint64_t M
= RawMask
[i
];
298 ShuffleMask
.push_back(SM_SentinelUndef
);
301 // For 256/512-bit vectors the base of the shuffle is the 128-bit
302 // subvector we're inside.
303 int Base
= (i
/ 16) * 16;
304 // If the high bit (7) of the byte is set, the element is zeroed.
306 ShuffleMask
.push_back(SM_SentinelZero
);
308 // Only the least significant 4 bits of the byte are used.
309 int Index
= Base
+ (M
& 0xf);
310 ShuffleMask
.push_back(Index
);
315 void DecodeBLENDMask(unsigned NumElts
, unsigned Imm
,
316 SmallVectorImpl
<int> &ShuffleMask
) {
317 for (unsigned i
= 0; i
< NumElts
; ++i
) {
318 // If there are more than 8 elements in the vector, then any immediate blend
319 // mask wraps around.
320 unsigned Bit
= i
% 8;
321 ShuffleMask
.push_back(((Imm
>> Bit
) & 1) ? NumElts
+ i
: i
);
325 void DecodeVPPERMMask(ArrayRef
<uint64_t> RawMask
, const APInt
&UndefElts
,
326 SmallVectorImpl
<int> &ShuffleMask
) {
327 assert(RawMask
.size() == 16 && "Illegal VPPERM shuffle mask size");
330 // Bits[4:0] - Byte Index (0 - 31)
331 // Bits[7:5] - Permute Operation
333 // Permute Operation:
334 // 0 - Source byte (no logical operation).
335 // 1 - Invert source byte.
336 // 2 - Bit reverse of source byte.
337 // 3 - Bit reverse of inverted source byte.
338 // 4 - 00h (zero - fill).
339 // 5 - FFh (ones - fill).
340 // 6 - Most significant bit of source byte replicated in all bit positions.
341 // 7 - Invert most significant bit of source byte and replicate in all bit positions.
342 for (int i
= 0, e
= RawMask
.size(); i
< e
; ++i
) {
344 ShuffleMask
.push_back(SM_SentinelUndef
);
348 uint64_t M
= RawMask
[i
];
349 uint64_t PermuteOp
= (M
>> 5) & 0x7;
350 if (PermuteOp
== 4) {
351 ShuffleMask
.push_back(SM_SentinelZero
);
354 if (PermuteOp
!= 0) {
359 uint64_t Index
= M
& 0x1F;
360 ShuffleMask
.push_back((int)Index
);
364 void DecodeVPERMMask(unsigned NumElts
, unsigned Imm
,
365 SmallVectorImpl
<int> &ShuffleMask
) {
366 for (unsigned l
= 0; l
!= NumElts
; l
+= 4)
367 for (unsigned i
= 0; i
!= 4; ++i
)
368 ShuffleMask
.push_back(l
+ ((Imm
>> (2 * i
)) & 3));
371 void DecodeZeroExtendMask(unsigned SrcScalarBits
, unsigned DstScalarBits
,
372 unsigned NumDstElts
, bool IsAnyExtend
,
373 SmallVectorImpl
<int> &ShuffleMask
) {
374 unsigned Scale
= DstScalarBits
/ SrcScalarBits
;
375 assert(SrcScalarBits
< DstScalarBits
&&
376 "Expected zero extension mask to increase scalar size");
378 int Sentinel
= IsAnyExtend
? SM_SentinelUndef
: SM_SentinelZero
;
379 for (unsigned i
= 0; i
!= NumDstElts
; i
++) {
380 ShuffleMask
.push_back(i
);
381 ShuffleMask
.append(Scale
- 1, Sentinel
);
385 void DecodeZeroMoveLowMask(unsigned NumElts
,
386 SmallVectorImpl
<int> &ShuffleMask
) {
387 ShuffleMask
.push_back(0);
388 ShuffleMask
.append(NumElts
- 1, SM_SentinelZero
);
391 void DecodeScalarMoveMask(unsigned NumElts
, bool IsLoad
,
392 SmallVectorImpl
<int> &ShuffleMask
) {
393 // First element comes from the first element of second source.
394 // Remaining elements: Load zero extends / Move copies from first source.
395 ShuffleMask
.push_back(NumElts
);
396 for (unsigned i
= 1; i
< NumElts
; i
++)
397 ShuffleMask
.push_back(IsLoad
? static_cast<int>(SM_SentinelZero
) : i
);
400 void DecodeEXTRQIMask(unsigned NumElts
, unsigned EltSize
, int Len
, int Idx
,
401 SmallVectorImpl
<int> &ShuffleMask
) {
402 unsigned HalfElts
= NumElts
/ 2;
404 // Only the bottom 6 bits are valid for each immediate.
408 // We can only decode this bit extraction instruction as a shuffle if both the
409 // length and index work with whole elements.
410 if (0 != (Len
% EltSize
) || 0 != (Idx
% EltSize
))
413 // A length of zero is equivalent to a bit length of 64.
417 // If the length + index exceeds the bottom 64 bits the result is undefined.
418 if ((Len
+ Idx
) > 64) {
419 ShuffleMask
.append(NumElts
, SM_SentinelUndef
);
423 // Convert index and index to work with elements.
427 // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
428 // elements of the lower 64-bits. The upper 64-bits are undefined.
429 for (int i
= 0; i
!= Len
; ++i
)
430 ShuffleMask
.push_back(i
+ Idx
);
431 for (int i
= Len
; i
!= (int)HalfElts
; ++i
)
432 ShuffleMask
.push_back(SM_SentinelZero
);
433 for (int i
= HalfElts
; i
!= (int)NumElts
; ++i
)
434 ShuffleMask
.push_back(SM_SentinelUndef
);
437 void DecodeINSERTQIMask(unsigned NumElts
, unsigned EltSize
, int Len
, int Idx
,
438 SmallVectorImpl
<int> &ShuffleMask
) {
439 unsigned HalfElts
= NumElts
/ 2;
441 // Only the bottom 6 bits are valid for each immediate.
445 // We can only decode this bit insertion instruction as a shuffle if both the
446 // length and index work with whole elements.
447 if (0 != (Len
% EltSize
) || 0 != (Idx
% EltSize
))
450 // A length of zero is equivalent to a bit length of 64.
454 // If the length + index exceeds the bottom 64 bits the result is undefined.
455 if ((Len
+ Idx
) > 64) {
456 ShuffleMask
.append(NumElts
, SM_SentinelUndef
);
460 // Convert index and index to work with elements.
464 // INSERTQ: Extract lowest Len elements from lower half of second source and
465 // insert over first source starting at Idx element. The upper 64-bits are
467 for (int i
= 0; i
!= Idx
; ++i
)
468 ShuffleMask
.push_back(i
);
469 for (int i
= 0; i
!= Len
; ++i
)
470 ShuffleMask
.push_back(i
+ NumElts
);
471 for (int i
= Idx
+ Len
; i
!= (int)HalfElts
; ++i
)
472 ShuffleMask
.push_back(i
);
473 for (int i
= HalfElts
; i
!= (int)NumElts
; ++i
)
474 ShuffleMask
.push_back(SM_SentinelUndef
);
477 void DecodeVPERMILPMask(unsigned NumElts
, unsigned ScalarBits
,
478 ArrayRef
<uint64_t> RawMask
, const APInt
&UndefElts
,
479 SmallVectorImpl
<int> &ShuffleMask
) {
480 unsigned VecSize
= NumElts
* ScalarBits
;
481 unsigned NumLanes
= VecSize
/ 128;
482 unsigned NumEltsPerLane
= NumElts
/ NumLanes
;
483 assert((VecSize
== 128 || VecSize
== 256 || VecSize
== 512) &&
484 "Unexpected vector size");
485 assert((ScalarBits
== 32 || ScalarBits
== 64) && "Unexpected element size");
487 for (unsigned i
= 0, e
= RawMask
.size(); i
< e
; ++i
) {
489 ShuffleMask
.push_back(SM_SentinelUndef
);
492 uint64_t M
= RawMask
[i
];
493 M
= (ScalarBits
== 64 ? ((M
>> 1) & 0x1) : (M
& 0x3));
494 unsigned LaneOffset
= i
& ~(NumEltsPerLane
- 1);
495 ShuffleMask
.push_back((int)(LaneOffset
+ M
));
499 void DecodeVPERMIL2PMask(unsigned NumElts
, unsigned ScalarBits
, unsigned M2Z
,
500 ArrayRef
<uint64_t> RawMask
, const APInt
&UndefElts
,
501 SmallVectorImpl
<int> &ShuffleMask
) {
502 unsigned VecSize
= NumElts
* ScalarBits
;
503 unsigned NumLanes
= VecSize
/ 128;
504 unsigned NumEltsPerLane
= NumElts
/ NumLanes
;
505 assert((VecSize
== 128 || VecSize
== 256) && "Unexpected vector size");
506 assert((ScalarBits
== 32 || ScalarBits
== 64) && "Unexpected element size");
507 assert((NumElts
== RawMask
.size()) && "Unexpected mask size");
509 for (unsigned i
= 0, e
= RawMask
.size(); i
< e
; ++i
) {
511 ShuffleMask
.push_back(SM_SentinelUndef
);
515 // VPERMIL2 Operation.
516 // Bits[3] - Match Bit.
517 // Bits[2:1] - (Per Lane) PD Shuffle Mask.
518 // Bits[2:0] - (Per Lane) PS Shuffle Mask.
519 uint64_t Selector
= RawMask
[i
];
520 unsigned MatchBit
= (Selector
>> 3) & 0x1;
523 // 0Xb X Source selected by Selector index.
524 // 10b 0 Source selected by Selector index.
527 // 11b 1 Source selected by Selector index.
528 if ((M2Z
& 0x2) != 0 && MatchBit
!= (M2Z
& 0x1)) {
529 ShuffleMask
.push_back(SM_SentinelZero
);
533 int Index
= i
& ~(NumEltsPerLane
- 1);
534 if (ScalarBits
== 64)
535 Index
+= (Selector
>> 1) & 0x1;
537 Index
+= Selector
& 0x3;
539 int Src
= (Selector
>> 2) & 0x1;
540 Index
+= Src
* NumElts
;
541 ShuffleMask
.push_back(Index
);
545 void DecodeVPERMVMask(ArrayRef
<uint64_t> RawMask
, const APInt
&UndefElts
,
546 SmallVectorImpl
<int> &ShuffleMask
) {
547 uint64_t EltMaskSize
= RawMask
.size() - 1;
548 for (int i
= 0, e
= RawMask
.size(); i
!= e
; ++i
) {
550 ShuffleMask
.push_back(SM_SentinelUndef
);
553 uint64_t M
= RawMask
[i
];
555 ShuffleMask
.push_back((int)M
);
559 void DecodeVPERMV3Mask(ArrayRef
<uint64_t> RawMask
, const APInt
&UndefElts
,
560 SmallVectorImpl
<int> &ShuffleMask
) {
561 uint64_t EltMaskSize
= (RawMask
.size() * 2) - 1;
562 for (int i
= 0, e
= RawMask
.size(); i
!= e
; ++i
) {
564 ShuffleMask
.push_back(SM_SentinelUndef
);
567 uint64_t M
= RawMask
[i
];
569 ShuffleMask
.push_back((int)M
);