1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements the TargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Target/TargetLowering.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/Target/TargetData.h"
18 #include "llvm/Target/TargetLoweringObjectFile.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Target/TargetRegisterInfo.h"
21 #include "llvm/GlobalVariable.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/CodeGen/Analysis.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineJumpTableInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/MathExtras.h"
35 TLSModel::Model
getTLSModel(const GlobalValue
*GV
, Reloc::Model reloc
) {
36 bool isLocal
= GV
->hasLocalLinkage();
37 bool isDeclaration
= GV
->isDeclaration();
38 // FIXME: what should we do for protected and internal visibility?
39 // For variables, is internal different from hidden?
40 bool isHidden
= GV
->hasHiddenVisibility();
42 if (reloc
== Reloc::PIC_
) {
43 if (isLocal
|| isHidden
)
44 return TLSModel::LocalDynamic
;
46 return TLSModel::GeneralDynamic
;
48 if (!isDeclaration
|| isHidden
)
49 return TLSModel::LocalExec
;
51 return TLSModel::InitialExec
;
56 /// InitLibcallNames - Set default libcall names.
58 static void InitLibcallNames(const char **Names
) {
59 Names
[RTLIB::SHL_I16
] = "__ashlhi3";
60 Names
[RTLIB::SHL_I32
] = "__ashlsi3";
61 Names
[RTLIB::SHL_I64
] = "__ashldi3";
62 Names
[RTLIB::SHL_I128
] = "__ashlti3";
63 Names
[RTLIB::SRL_I16
] = "__lshrhi3";
64 Names
[RTLIB::SRL_I32
] = "__lshrsi3";
65 Names
[RTLIB::SRL_I64
] = "__lshrdi3";
66 Names
[RTLIB::SRL_I128
] = "__lshrti3";
67 Names
[RTLIB::SRA_I16
] = "__ashrhi3";
68 Names
[RTLIB::SRA_I32
] = "__ashrsi3";
69 Names
[RTLIB::SRA_I64
] = "__ashrdi3";
70 Names
[RTLIB::SRA_I128
] = "__ashrti3";
71 Names
[RTLIB::MUL_I8
] = "__mulqi3";
72 Names
[RTLIB::MUL_I16
] = "__mulhi3";
73 Names
[RTLIB::MUL_I32
] = "__mulsi3";
74 Names
[RTLIB::MUL_I64
] = "__muldi3";
75 Names
[RTLIB::MUL_I128
] = "__multi3";
76 Names
[RTLIB::SDIV_I8
] = "__divqi3";
77 Names
[RTLIB::SDIV_I16
] = "__divhi3";
78 Names
[RTLIB::SDIV_I32
] = "__divsi3";
79 Names
[RTLIB::SDIV_I64
] = "__divdi3";
80 Names
[RTLIB::SDIV_I128
] = "__divti3";
81 Names
[RTLIB::UDIV_I8
] = "__udivqi3";
82 Names
[RTLIB::UDIV_I16
] = "__udivhi3";
83 Names
[RTLIB::UDIV_I32
] = "__udivsi3";
84 Names
[RTLIB::UDIV_I64
] = "__udivdi3";
85 Names
[RTLIB::UDIV_I128
] = "__udivti3";
86 Names
[RTLIB::SREM_I8
] = "__modqi3";
87 Names
[RTLIB::SREM_I16
] = "__modhi3";
88 Names
[RTLIB::SREM_I32
] = "__modsi3";
89 Names
[RTLIB::SREM_I64
] = "__moddi3";
90 Names
[RTLIB::SREM_I128
] = "__modti3";
91 Names
[RTLIB::UREM_I8
] = "__umodqi3";
92 Names
[RTLIB::UREM_I16
] = "__umodhi3";
93 Names
[RTLIB::UREM_I32
] = "__umodsi3";
94 Names
[RTLIB::UREM_I64
] = "__umoddi3";
95 Names
[RTLIB::UREM_I128
] = "__umodti3";
97 // These are generally not available.
98 Names
[RTLIB::SDIVREM_I8
] = 0;
99 Names
[RTLIB::SDIVREM_I16
] = 0;
100 Names
[RTLIB::SDIVREM_I32
] = 0;
101 Names
[RTLIB::SDIVREM_I64
] = 0;
102 Names
[RTLIB::SDIVREM_I128
] = 0;
103 Names
[RTLIB::UDIVREM_I8
] = 0;
104 Names
[RTLIB::UDIVREM_I16
] = 0;
105 Names
[RTLIB::UDIVREM_I32
] = 0;
106 Names
[RTLIB::UDIVREM_I64
] = 0;
107 Names
[RTLIB::UDIVREM_I128
] = 0;
109 Names
[RTLIB::NEG_I32
] = "__negsi2";
110 Names
[RTLIB::NEG_I64
] = "__negdi2";
111 Names
[RTLIB::ADD_F32
] = "__addsf3";
112 Names
[RTLIB::ADD_F64
] = "__adddf3";
113 Names
[RTLIB::ADD_F80
] = "__addxf3";
114 Names
[RTLIB::ADD_PPCF128
] = "__gcc_qadd";
115 Names
[RTLIB::SUB_F32
] = "__subsf3";
116 Names
[RTLIB::SUB_F64
] = "__subdf3";
117 Names
[RTLIB::SUB_F80
] = "__subxf3";
118 Names
[RTLIB::SUB_PPCF128
] = "__gcc_qsub";
119 Names
[RTLIB::MUL_F32
] = "__mulsf3";
120 Names
[RTLIB::MUL_F64
] = "__muldf3";
121 Names
[RTLIB::MUL_F80
] = "__mulxf3";
122 Names
[RTLIB::MUL_PPCF128
] = "__gcc_qmul";
123 Names
[RTLIB::DIV_F32
] = "__divsf3";
124 Names
[RTLIB::DIV_F64
] = "__divdf3";
125 Names
[RTLIB::DIV_F80
] = "__divxf3";
126 Names
[RTLIB::DIV_PPCF128
] = "__gcc_qdiv";
127 Names
[RTLIB::REM_F32
] = "fmodf";
128 Names
[RTLIB::REM_F64
] = "fmod";
129 Names
[RTLIB::REM_F80
] = "fmodl";
130 Names
[RTLIB::REM_PPCF128
] = "fmodl";
131 Names
[RTLIB::POWI_F32
] = "__powisf2";
132 Names
[RTLIB::POWI_F64
] = "__powidf2";
133 Names
[RTLIB::POWI_F80
] = "__powixf2";
134 Names
[RTLIB::POWI_PPCF128
] = "__powitf2";
135 Names
[RTLIB::SQRT_F32
] = "sqrtf";
136 Names
[RTLIB::SQRT_F64
] = "sqrt";
137 Names
[RTLIB::SQRT_F80
] = "sqrtl";
138 Names
[RTLIB::SQRT_PPCF128
] = "sqrtl";
139 Names
[RTLIB::LOG_F32
] = "logf";
140 Names
[RTLIB::LOG_F64
] = "log";
141 Names
[RTLIB::LOG_F80
] = "logl";
142 Names
[RTLIB::LOG_PPCF128
] = "logl";
143 Names
[RTLIB::LOG2_F32
] = "log2f";
144 Names
[RTLIB::LOG2_F64
] = "log2";
145 Names
[RTLIB::LOG2_F80
] = "log2l";
146 Names
[RTLIB::LOG2_PPCF128
] = "log2l";
147 Names
[RTLIB::LOG10_F32
] = "log10f";
148 Names
[RTLIB::LOG10_F64
] = "log10";
149 Names
[RTLIB::LOG10_F80
] = "log10l";
150 Names
[RTLIB::LOG10_PPCF128
] = "log10l";
151 Names
[RTLIB::EXP_F32
] = "expf";
152 Names
[RTLIB::EXP_F64
] = "exp";
153 Names
[RTLIB::EXP_F80
] = "expl";
154 Names
[RTLIB::EXP_PPCF128
] = "expl";
155 Names
[RTLIB::EXP2_F32
] = "exp2f";
156 Names
[RTLIB::EXP2_F64
] = "exp2";
157 Names
[RTLIB::EXP2_F80
] = "exp2l";
158 Names
[RTLIB::EXP2_PPCF128
] = "exp2l";
159 Names
[RTLIB::SIN_F32
] = "sinf";
160 Names
[RTLIB::SIN_F64
] = "sin";
161 Names
[RTLIB::SIN_F80
] = "sinl";
162 Names
[RTLIB::SIN_PPCF128
] = "sinl";
163 Names
[RTLIB::COS_F32
] = "cosf";
164 Names
[RTLIB::COS_F64
] = "cos";
165 Names
[RTLIB::COS_F80
] = "cosl";
166 Names
[RTLIB::COS_PPCF128
] = "cosl";
167 Names
[RTLIB::POW_F32
] = "powf";
168 Names
[RTLIB::POW_F64
] = "pow";
169 Names
[RTLIB::POW_F80
] = "powl";
170 Names
[RTLIB::POW_PPCF128
] = "powl";
171 Names
[RTLIB::CEIL_F32
] = "ceilf";
172 Names
[RTLIB::CEIL_F64
] = "ceil";
173 Names
[RTLIB::CEIL_F80
] = "ceill";
174 Names
[RTLIB::CEIL_PPCF128
] = "ceill";
175 Names
[RTLIB::TRUNC_F32
] = "truncf";
176 Names
[RTLIB::TRUNC_F64
] = "trunc";
177 Names
[RTLIB::TRUNC_F80
] = "truncl";
178 Names
[RTLIB::TRUNC_PPCF128
] = "truncl";
179 Names
[RTLIB::RINT_F32
] = "rintf";
180 Names
[RTLIB::RINT_F64
] = "rint";
181 Names
[RTLIB::RINT_F80
] = "rintl";
182 Names
[RTLIB::RINT_PPCF128
] = "rintl";
183 Names
[RTLIB::NEARBYINT_F32
] = "nearbyintf";
184 Names
[RTLIB::NEARBYINT_F64
] = "nearbyint";
185 Names
[RTLIB::NEARBYINT_F80
] = "nearbyintl";
186 Names
[RTLIB::NEARBYINT_PPCF128
] = "nearbyintl";
187 Names
[RTLIB::FLOOR_F32
] = "floorf";
188 Names
[RTLIB::FLOOR_F64
] = "floor";
189 Names
[RTLIB::FLOOR_F80
] = "floorl";
190 Names
[RTLIB::FLOOR_PPCF128
] = "floorl";
191 Names
[RTLIB::COPYSIGN_F32
] = "copysignf";
192 Names
[RTLIB::COPYSIGN_F64
] = "copysign";
193 Names
[RTLIB::COPYSIGN_F80
] = "copysignl";
194 Names
[RTLIB::COPYSIGN_PPCF128
] = "copysignl";
195 Names
[RTLIB::FPEXT_F32_F64
] = "__extendsfdf2";
196 Names
[RTLIB::FPEXT_F16_F32
] = "__gnu_h2f_ieee";
197 Names
[RTLIB::FPROUND_F32_F16
] = "__gnu_f2h_ieee";
198 Names
[RTLIB::FPROUND_F64_F32
] = "__truncdfsf2";
199 Names
[RTLIB::FPROUND_F80_F32
] = "__truncxfsf2";
200 Names
[RTLIB::FPROUND_PPCF128_F32
] = "__trunctfsf2";
201 Names
[RTLIB::FPROUND_F80_F64
] = "__truncxfdf2";
202 Names
[RTLIB::FPROUND_PPCF128_F64
] = "__trunctfdf2";
203 Names
[RTLIB::FPTOSINT_F32_I8
] = "__fixsfqi";
204 Names
[RTLIB::FPTOSINT_F32_I16
] = "__fixsfhi";
205 Names
[RTLIB::FPTOSINT_F32_I32
] = "__fixsfsi";
206 Names
[RTLIB::FPTOSINT_F32_I64
] = "__fixsfdi";
207 Names
[RTLIB::FPTOSINT_F32_I128
] = "__fixsfti";
208 Names
[RTLIB::FPTOSINT_F64_I8
] = "__fixdfqi";
209 Names
[RTLIB::FPTOSINT_F64_I16
] = "__fixdfhi";
210 Names
[RTLIB::FPTOSINT_F64_I32
] = "__fixdfsi";
211 Names
[RTLIB::FPTOSINT_F64_I64
] = "__fixdfdi";
212 Names
[RTLIB::FPTOSINT_F64_I128
] = "__fixdfti";
213 Names
[RTLIB::FPTOSINT_F80_I32
] = "__fixxfsi";
214 Names
[RTLIB::FPTOSINT_F80_I64
] = "__fixxfdi";
215 Names
[RTLIB::FPTOSINT_F80_I128
] = "__fixxfti";
216 Names
[RTLIB::FPTOSINT_PPCF128_I32
] = "__fixtfsi";
217 Names
[RTLIB::FPTOSINT_PPCF128_I64
] = "__fixtfdi";
218 Names
[RTLIB::FPTOSINT_PPCF128_I128
] = "__fixtfti";
219 Names
[RTLIB::FPTOUINT_F32_I8
] = "__fixunssfqi";
220 Names
[RTLIB::FPTOUINT_F32_I16
] = "__fixunssfhi";
221 Names
[RTLIB::FPTOUINT_F32_I32
] = "__fixunssfsi";
222 Names
[RTLIB::FPTOUINT_F32_I64
] = "__fixunssfdi";
223 Names
[RTLIB::FPTOUINT_F32_I128
] = "__fixunssfti";
224 Names
[RTLIB::FPTOUINT_F64_I8
] = "__fixunsdfqi";
225 Names
[RTLIB::FPTOUINT_F64_I16
] = "__fixunsdfhi";
226 Names
[RTLIB::FPTOUINT_F64_I32
] = "__fixunsdfsi";
227 Names
[RTLIB::FPTOUINT_F64_I64
] = "__fixunsdfdi";
228 Names
[RTLIB::FPTOUINT_F64_I128
] = "__fixunsdfti";
229 Names
[RTLIB::FPTOUINT_F80_I32
] = "__fixunsxfsi";
230 Names
[RTLIB::FPTOUINT_F80_I64
] = "__fixunsxfdi";
231 Names
[RTLIB::FPTOUINT_F80_I128
] = "__fixunsxfti";
232 Names
[RTLIB::FPTOUINT_PPCF128_I32
] = "__fixunstfsi";
233 Names
[RTLIB::FPTOUINT_PPCF128_I64
] = "__fixunstfdi";
234 Names
[RTLIB::FPTOUINT_PPCF128_I128
] = "__fixunstfti";
235 Names
[RTLIB::SINTTOFP_I32_F32
] = "__floatsisf";
236 Names
[RTLIB::SINTTOFP_I32_F64
] = "__floatsidf";
237 Names
[RTLIB::SINTTOFP_I32_F80
] = "__floatsixf";
238 Names
[RTLIB::SINTTOFP_I32_PPCF128
] = "__floatsitf";
239 Names
[RTLIB::SINTTOFP_I64_F32
] = "__floatdisf";
240 Names
[RTLIB::SINTTOFP_I64_F64
] = "__floatdidf";
241 Names
[RTLIB::SINTTOFP_I64_F80
] = "__floatdixf";
242 Names
[RTLIB::SINTTOFP_I64_PPCF128
] = "__floatditf";
243 Names
[RTLIB::SINTTOFP_I128_F32
] = "__floattisf";
244 Names
[RTLIB::SINTTOFP_I128_F64
] = "__floattidf";
245 Names
[RTLIB::SINTTOFP_I128_F80
] = "__floattixf";
246 Names
[RTLIB::SINTTOFP_I128_PPCF128
] = "__floattitf";
247 Names
[RTLIB::UINTTOFP_I32_F32
] = "__floatunsisf";
248 Names
[RTLIB::UINTTOFP_I32_F64
] = "__floatunsidf";
249 Names
[RTLIB::UINTTOFP_I32_F80
] = "__floatunsixf";
250 Names
[RTLIB::UINTTOFP_I32_PPCF128
] = "__floatunsitf";
251 Names
[RTLIB::UINTTOFP_I64_F32
] = "__floatundisf";
252 Names
[RTLIB::UINTTOFP_I64_F64
] = "__floatundidf";
253 Names
[RTLIB::UINTTOFP_I64_F80
] = "__floatundixf";
254 Names
[RTLIB::UINTTOFP_I64_PPCF128
] = "__floatunditf";
255 Names
[RTLIB::UINTTOFP_I128_F32
] = "__floatuntisf";
256 Names
[RTLIB::UINTTOFP_I128_F64
] = "__floatuntidf";
257 Names
[RTLIB::UINTTOFP_I128_F80
] = "__floatuntixf";
258 Names
[RTLIB::UINTTOFP_I128_PPCF128
] = "__floatuntitf";
259 Names
[RTLIB::OEQ_F32
] = "__eqsf2";
260 Names
[RTLIB::OEQ_F64
] = "__eqdf2";
261 Names
[RTLIB::UNE_F32
] = "__nesf2";
262 Names
[RTLIB::UNE_F64
] = "__nedf2";
263 Names
[RTLIB::OGE_F32
] = "__gesf2";
264 Names
[RTLIB::OGE_F64
] = "__gedf2";
265 Names
[RTLIB::OLT_F32
] = "__ltsf2";
266 Names
[RTLIB::OLT_F64
] = "__ltdf2";
267 Names
[RTLIB::OLE_F32
] = "__lesf2";
268 Names
[RTLIB::OLE_F64
] = "__ledf2";
269 Names
[RTLIB::OGT_F32
] = "__gtsf2";
270 Names
[RTLIB::OGT_F64
] = "__gtdf2";
271 Names
[RTLIB::UO_F32
] = "__unordsf2";
272 Names
[RTLIB::UO_F64
] = "__unorddf2";
273 Names
[RTLIB::O_F32
] = "__unordsf2";
274 Names
[RTLIB::O_F64
] = "__unorddf2";
275 Names
[RTLIB::MEMCPY
] = "memcpy";
276 Names
[RTLIB::MEMMOVE
] = "memmove";
277 Names
[RTLIB::MEMSET
] = "memset";
278 Names
[RTLIB::UNWIND_RESUME
] = "_Unwind_Resume";
279 Names
[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1
] = "__sync_val_compare_and_swap_1";
280 Names
[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2
] = "__sync_val_compare_and_swap_2";
281 Names
[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4
] = "__sync_val_compare_and_swap_4";
282 Names
[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8
] = "__sync_val_compare_and_swap_8";
283 Names
[RTLIB::SYNC_LOCK_TEST_AND_SET_1
] = "__sync_lock_test_and_set_1";
284 Names
[RTLIB::SYNC_LOCK_TEST_AND_SET_2
] = "__sync_lock_test_and_set_2";
285 Names
[RTLIB::SYNC_LOCK_TEST_AND_SET_4
] = "__sync_lock_test_and_set_4";
286 Names
[RTLIB::SYNC_LOCK_TEST_AND_SET_8
] = "__sync_lock_test_and_set_8";
287 Names
[RTLIB::SYNC_FETCH_AND_ADD_1
] = "__sync_fetch_and_add_1";
288 Names
[RTLIB::SYNC_FETCH_AND_ADD_2
] = "__sync_fetch_and_add_2";
289 Names
[RTLIB::SYNC_FETCH_AND_ADD_4
] = "__sync_fetch_and_add_4";
290 Names
[RTLIB::SYNC_FETCH_AND_ADD_8
] = "__sync_fetch_and_add_8";
291 Names
[RTLIB::SYNC_FETCH_AND_SUB_1
] = "__sync_fetch_and_sub_1";
292 Names
[RTLIB::SYNC_FETCH_AND_SUB_2
] = "__sync_fetch_and_sub_2";
293 Names
[RTLIB::SYNC_FETCH_AND_SUB_4
] = "__sync_fetch_and_sub_4";
294 Names
[RTLIB::SYNC_FETCH_AND_SUB_8
] = "__sync_fetch_and_sub_8";
295 Names
[RTLIB::SYNC_FETCH_AND_AND_1
] = "__sync_fetch_and_and_1";
296 Names
[RTLIB::SYNC_FETCH_AND_AND_2
] = "__sync_fetch_and_and_2";
297 Names
[RTLIB::SYNC_FETCH_AND_AND_4
] = "__sync_fetch_and_and_4";
298 Names
[RTLIB::SYNC_FETCH_AND_AND_8
] = "__sync_fetch_and_and_8";
299 Names
[RTLIB::SYNC_FETCH_AND_OR_1
] = "__sync_fetch_and_or_1";
300 Names
[RTLIB::SYNC_FETCH_AND_OR_2
] = "__sync_fetch_and_or_2";
301 Names
[RTLIB::SYNC_FETCH_AND_OR_4
] = "__sync_fetch_and_or_4";
302 Names
[RTLIB::SYNC_FETCH_AND_OR_8
] = "__sync_fetch_and_or_8";
303 Names
[RTLIB::SYNC_FETCH_AND_XOR_1
] = "__sync_fetch_and_xor_1";
304 Names
[RTLIB::SYNC_FETCH_AND_XOR_2
] = "__sync_fetch_and_xor_2";
305 Names
[RTLIB::SYNC_FETCH_AND_XOR_4
] = "__sync_fetch_and-xor_4";
306 Names
[RTLIB::SYNC_FETCH_AND_XOR_8
] = "__sync_fetch_and_xor_8";
307 Names
[RTLIB::SYNC_FETCH_AND_NAND_1
] = "__sync_fetch_and_nand_1";
308 Names
[RTLIB::SYNC_FETCH_AND_NAND_2
] = "__sync_fetch_and_nand_2";
309 Names
[RTLIB::SYNC_FETCH_AND_NAND_4
] = "__sync_fetch_and_nand_4";
310 Names
[RTLIB::SYNC_FETCH_AND_NAND_8
] = "__sync_fetch_and_nand_8";
313 /// InitLibcallCallingConvs - Set default libcall CallingConvs.
315 static void InitLibcallCallingConvs(CallingConv::ID
*CCs
) {
316 for (int i
= 0; i
< RTLIB::UNKNOWN_LIBCALL
; ++i
) {
317 CCs
[i
] = CallingConv::C
;
321 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
322 /// UNKNOWN_LIBCALL if there is none.
323 RTLIB::Libcall
RTLIB::getFPEXT(EVT OpVT
, EVT RetVT
) {
324 if (OpVT
== MVT::f32
) {
325 if (RetVT
== MVT::f64
)
326 return FPEXT_F32_F64
;
329 return UNKNOWN_LIBCALL
;
332 /// getFPROUND - Return the FPROUND_*_* value for the given types, or
333 /// UNKNOWN_LIBCALL if there is none.
334 RTLIB::Libcall
RTLIB::getFPROUND(EVT OpVT
, EVT RetVT
) {
335 if (RetVT
== MVT::f32
) {
336 if (OpVT
== MVT::f64
)
337 return FPROUND_F64_F32
;
338 if (OpVT
== MVT::f80
)
339 return FPROUND_F80_F32
;
340 if (OpVT
== MVT::ppcf128
)
341 return FPROUND_PPCF128_F32
;
342 } else if (RetVT
== MVT::f64
) {
343 if (OpVT
== MVT::f80
)
344 return FPROUND_F80_F64
;
345 if (OpVT
== MVT::ppcf128
)
346 return FPROUND_PPCF128_F64
;
349 return UNKNOWN_LIBCALL
;
352 /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
353 /// UNKNOWN_LIBCALL if there is none.
354 RTLIB::Libcall
RTLIB::getFPTOSINT(EVT OpVT
, EVT RetVT
) {
355 if (OpVT
== MVT::f32
) {
356 if (RetVT
== MVT::i8
)
357 return FPTOSINT_F32_I8
;
358 if (RetVT
== MVT::i16
)
359 return FPTOSINT_F32_I16
;
360 if (RetVT
== MVT::i32
)
361 return FPTOSINT_F32_I32
;
362 if (RetVT
== MVT::i64
)
363 return FPTOSINT_F32_I64
;
364 if (RetVT
== MVT::i128
)
365 return FPTOSINT_F32_I128
;
366 } else if (OpVT
== MVT::f64
) {
367 if (RetVT
== MVT::i8
)
368 return FPTOSINT_F64_I8
;
369 if (RetVT
== MVT::i16
)
370 return FPTOSINT_F64_I16
;
371 if (RetVT
== MVT::i32
)
372 return FPTOSINT_F64_I32
;
373 if (RetVT
== MVT::i64
)
374 return FPTOSINT_F64_I64
;
375 if (RetVT
== MVT::i128
)
376 return FPTOSINT_F64_I128
;
377 } else if (OpVT
== MVT::f80
) {
378 if (RetVT
== MVT::i32
)
379 return FPTOSINT_F80_I32
;
380 if (RetVT
== MVT::i64
)
381 return FPTOSINT_F80_I64
;
382 if (RetVT
== MVT::i128
)
383 return FPTOSINT_F80_I128
;
384 } else if (OpVT
== MVT::ppcf128
) {
385 if (RetVT
== MVT::i32
)
386 return FPTOSINT_PPCF128_I32
;
387 if (RetVT
== MVT::i64
)
388 return FPTOSINT_PPCF128_I64
;
389 if (RetVT
== MVT::i128
)
390 return FPTOSINT_PPCF128_I128
;
392 return UNKNOWN_LIBCALL
;
395 /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
396 /// UNKNOWN_LIBCALL if there is none.
397 RTLIB::Libcall
RTLIB::getFPTOUINT(EVT OpVT
, EVT RetVT
) {
398 if (OpVT
== MVT::f32
) {
399 if (RetVT
== MVT::i8
)
400 return FPTOUINT_F32_I8
;
401 if (RetVT
== MVT::i16
)
402 return FPTOUINT_F32_I16
;
403 if (RetVT
== MVT::i32
)
404 return FPTOUINT_F32_I32
;
405 if (RetVT
== MVT::i64
)
406 return FPTOUINT_F32_I64
;
407 if (RetVT
== MVT::i128
)
408 return FPTOUINT_F32_I128
;
409 } else if (OpVT
== MVT::f64
) {
410 if (RetVT
== MVT::i8
)
411 return FPTOUINT_F64_I8
;
412 if (RetVT
== MVT::i16
)
413 return FPTOUINT_F64_I16
;
414 if (RetVT
== MVT::i32
)
415 return FPTOUINT_F64_I32
;
416 if (RetVT
== MVT::i64
)
417 return FPTOUINT_F64_I64
;
418 if (RetVT
== MVT::i128
)
419 return FPTOUINT_F64_I128
;
420 } else if (OpVT
== MVT::f80
) {
421 if (RetVT
== MVT::i32
)
422 return FPTOUINT_F80_I32
;
423 if (RetVT
== MVT::i64
)
424 return FPTOUINT_F80_I64
;
425 if (RetVT
== MVT::i128
)
426 return FPTOUINT_F80_I128
;
427 } else if (OpVT
== MVT::ppcf128
) {
428 if (RetVT
== MVT::i32
)
429 return FPTOUINT_PPCF128_I32
;
430 if (RetVT
== MVT::i64
)
431 return FPTOUINT_PPCF128_I64
;
432 if (RetVT
== MVT::i128
)
433 return FPTOUINT_PPCF128_I128
;
435 return UNKNOWN_LIBCALL
;
438 /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
439 /// UNKNOWN_LIBCALL if there is none.
440 RTLIB::Libcall
RTLIB::getSINTTOFP(EVT OpVT
, EVT RetVT
) {
441 if (OpVT
== MVT::i32
) {
442 if (RetVT
== MVT::f32
)
443 return SINTTOFP_I32_F32
;
444 else if (RetVT
== MVT::f64
)
445 return SINTTOFP_I32_F64
;
446 else if (RetVT
== MVT::f80
)
447 return SINTTOFP_I32_F80
;
448 else if (RetVT
== MVT::ppcf128
)
449 return SINTTOFP_I32_PPCF128
;
450 } else if (OpVT
== MVT::i64
) {
451 if (RetVT
== MVT::f32
)
452 return SINTTOFP_I64_F32
;
453 else if (RetVT
== MVT::f64
)
454 return SINTTOFP_I64_F64
;
455 else if (RetVT
== MVT::f80
)
456 return SINTTOFP_I64_F80
;
457 else if (RetVT
== MVT::ppcf128
)
458 return SINTTOFP_I64_PPCF128
;
459 } else if (OpVT
== MVT::i128
) {
460 if (RetVT
== MVT::f32
)
461 return SINTTOFP_I128_F32
;
462 else if (RetVT
== MVT::f64
)
463 return SINTTOFP_I128_F64
;
464 else if (RetVT
== MVT::f80
)
465 return SINTTOFP_I128_F80
;
466 else if (RetVT
== MVT::ppcf128
)
467 return SINTTOFP_I128_PPCF128
;
469 return UNKNOWN_LIBCALL
;
472 /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
473 /// UNKNOWN_LIBCALL if there is none.
474 RTLIB::Libcall
RTLIB::getUINTTOFP(EVT OpVT
, EVT RetVT
) {
475 if (OpVT
== MVT::i32
) {
476 if (RetVT
== MVT::f32
)
477 return UINTTOFP_I32_F32
;
478 else if (RetVT
== MVT::f64
)
479 return UINTTOFP_I32_F64
;
480 else if (RetVT
== MVT::f80
)
481 return UINTTOFP_I32_F80
;
482 else if (RetVT
== MVT::ppcf128
)
483 return UINTTOFP_I32_PPCF128
;
484 } else if (OpVT
== MVT::i64
) {
485 if (RetVT
== MVT::f32
)
486 return UINTTOFP_I64_F32
;
487 else if (RetVT
== MVT::f64
)
488 return UINTTOFP_I64_F64
;
489 else if (RetVT
== MVT::f80
)
490 return UINTTOFP_I64_F80
;
491 else if (RetVT
== MVT::ppcf128
)
492 return UINTTOFP_I64_PPCF128
;
493 } else if (OpVT
== MVT::i128
) {
494 if (RetVT
== MVT::f32
)
495 return UINTTOFP_I128_F32
;
496 else if (RetVT
== MVT::f64
)
497 return UINTTOFP_I128_F64
;
498 else if (RetVT
== MVT::f80
)
499 return UINTTOFP_I128_F80
;
500 else if (RetVT
== MVT::ppcf128
)
501 return UINTTOFP_I128_PPCF128
;
503 return UNKNOWN_LIBCALL
;
506 /// InitCmpLibcallCCs - Set default comparison libcall CC.
508 static void InitCmpLibcallCCs(ISD::CondCode
*CCs
) {
509 memset(CCs
, ISD::SETCC_INVALID
, sizeof(ISD::CondCode
)*RTLIB::UNKNOWN_LIBCALL
);
510 CCs
[RTLIB::OEQ_F32
] = ISD::SETEQ
;
511 CCs
[RTLIB::OEQ_F64
] = ISD::SETEQ
;
512 CCs
[RTLIB::UNE_F32
] = ISD::SETNE
;
513 CCs
[RTLIB::UNE_F64
] = ISD::SETNE
;
514 CCs
[RTLIB::OGE_F32
] = ISD::SETGE
;
515 CCs
[RTLIB::OGE_F64
] = ISD::SETGE
;
516 CCs
[RTLIB::OLT_F32
] = ISD::SETLT
;
517 CCs
[RTLIB::OLT_F64
] = ISD::SETLT
;
518 CCs
[RTLIB::OLE_F32
] = ISD::SETLE
;
519 CCs
[RTLIB::OLE_F64
] = ISD::SETLE
;
520 CCs
[RTLIB::OGT_F32
] = ISD::SETGT
;
521 CCs
[RTLIB::OGT_F64
] = ISD::SETGT
;
522 CCs
[RTLIB::UO_F32
] = ISD::SETNE
;
523 CCs
[RTLIB::UO_F64
] = ISD::SETNE
;
524 CCs
[RTLIB::O_F32
] = ISD::SETEQ
;
525 CCs
[RTLIB::O_F64
] = ISD::SETEQ
;
528 /// NOTE: The constructor takes ownership of TLOF.
529 TargetLowering::TargetLowering(const TargetMachine
&tm
,
530 const TargetLoweringObjectFile
*tlof
)
531 : TM(tm
), TD(TM
.getTargetData()), TLOF(*tlof
) {
532 // All operations default to being supported.
533 memset(OpActions
, 0, sizeof(OpActions
));
534 memset(LoadExtActions
, 0, sizeof(LoadExtActions
));
535 memset(TruncStoreActions
, 0, sizeof(TruncStoreActions
));
536 memset(IndexedModeActions
, 0, sizeof(IndexedModeActions
));
537 memset(CondCodeActions
, 0, sizeof(CondCodeActions
));
539 // Set default actions for various operations.
540 for (unsigned VT
= 0; VT
!= (unsigned)MVT::LAST_VALUETYPE
; ++VT
) {
541 // Default all indexed load / store to expand.
542 for (unsigned IM
= (unsigned)ISD::PRE_INC
;
543 IM
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++IM
) {
544 setIndexedLoadAction(IM
, (MVT::SimpleValueType
)VT
, Expand
);
545 setIndexedStoreAction(IM
, (MVT::SimpleValueType
)VT
, Expand
);
548 // These operations default to expand.
549 setOperationAction(ISD::FGETSIGN
, (MVT::SimpleValueType
)VT
, Expand
);
550 setOperationAction(ISD::CONCAT_VECTORS
, (MVT::SimpleValueType
)VT
, Expand
);
553 // Most targets ignore the @llvm.prefetch intrinsic.
554 setOperationAction(ISD::PREFETCH
, MVT::Other
, Expand
);
556 // ConstantFP nodes default to expand. Targets can either change this to
557 // Legal, in which case all fp constants are legal, or use isFPImmLegal()
558 // to optimize expansions for certain constants.
559 setOperationAction(ISD::ConstantFP
, MVT::f32
, Expand
);
560 setOperationAction(ISD::ConstantFP
, MVT::f64
, Expand
);
561 setOperationAction(ISD::ConstantFP
, MVT::f80
, Expand
);
563 // These library functions default to expand.
564 setOperationAction(ISD::FLOG
, MVT::f64
, Expand
);
565 setOperationAction(ISD::FLOG2
, MVT::f64
, Expand
);
566 setOperationAction(ISD::FLOG10
,MVT::f64
, Expand
);
567 setOperationAction(ISD::FEXP
, MVT::f64
, Expand
);
568 setOperationAction(ISD::FEXP2
, MVT::f64
, Expand
);
569 setOperationAction(ISD::FLOG
, MVT::f32
, Expand
);
570 setOperationAction(ISD::FLOG2
, MVT::f32
, Expand
);
571 setOperationAction(ISD::FLOG10
,MVT::f32
, Expand
);
572 setOperationAction(ISD::FEXP
, MVT::f32
, Expand
);
573 setOperationAction(ISD::FEXP2
, MVT::f32
, Expand
);
575 // Default ISD::TRAP to expand (which turns it into abort).
576 setOperationAction(ISD::TRAP
, MVT::Other
, Expand
);
578 IsLittleEndian
= TD
->isLittleEndian();
579 PointerTy
= MVT::getIntegerVT(8*TD
->getPointerSize());
580 memset(RegClassForVT
, 0,MVT::LAST_VALUETYPE
*sizeof(TargetRegisterClass
*));
581 memset(TargetDAGCombineArray
, 0, array_lengthof(TargetDAGCombineArray
));
582 maxStoresPerMemset
= maxStoresPerMemcpy
= maxStoresPerMemmove
= 8;
583 maxStoresPerMemsetOptSize
= maxStoresPerMemcpyOptSize
584 = maxStoresPerMemmoveOptSize
= 4;
585 benefitFromCodePlacementOpt
= false;
586 UseUnderscoreSetJmp
= false;
587 UseUnderscoreLongJmp
= false;
588 SelectIsExpensive
= false;
589 IntDivIsCheap
= false;
590 Pow2DivIsCheap
= false;
591 JumpIsExpensive
= false;
592 StackPointerRegisterToSaveRestore
= 0;
593 ExceptionPointerRegister
= 0;
594 ExceptionSelectorRegister
= 0;
595 BooleanContents
= UndefinedBooleanContent
;
596 SchedPreferenceInfo
= Sched::Latency
;
598 JumpBufAlignment
= 0;
599 PrefLoopAlignment
= 0;
600 MinStackArgumentAlignment
= 1;
601 ShouldFoldAtomicFences
= false;
603 InitLibcallNames(LibcallRoutineNames
);
604 InitCmpLibcallCCs(CmpLibcallCCs
);
605 InitLibcallCallingConvs(LibcallCallingConvs
);
608 TargetLowering::~TargetLowering() {
612 MVT
TargetLowering::getShiftAmountTy(EVT LHSTy
) const {
613 return MVT::getIntegerVT(8*TD
->getPointerSize());
616 /// canOpTrap - Returns true if the operation can trap for the value type.
617 /// VT must be a legal type.
618 bool TargetLowering::canOpTrap(unsigned Op
, EVT VT
) const {
619 assert(isTypeLegal(VT
));
634 static unsigned getVectorTypeBreakdownMVT(MVT VT
, MVT
&IntermediateVT
,
635 unsigned &NumIntermediates
,
637 TargetLowering
*TLI
) {
638 // Figure out the right, legal destination reg to copy into.
639 unsigned NumElts
= VT
.getVectorNumElements();
640 MVT EltTy
= VT
.getVectorElementType();
642 unsigned NumVectorRegs
= 1;
644 // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
645 // could break down into LHS/RHS like LegalizeDAG does.
646 if (!isPowerOf2_32(NumElts
)) {
647 NumVectorRegs
= NumElts
;
651 // Divide the input until we get to a supported size. This will always
652 // end with a scalar if the target doesn't support vectors.
653 while (NumElts
> 1 && !TLI
->isTypeLegal(MVT::getVectorVT(EltTy
, NumElts
))) {
658 NumIntermediates
= NumVectorRegs
;
660 MVT NewVT
= MVT::getVectorVT(EltTy
, NumElts
);
661 if (!TLI
->isTypeLegal(NewVT
))
663 IntermediateVT
= NewVT
;
665 EVT DestVT
= TLI
->getRegisterType(NewVT
);
667 if (EVT(DestVT
).bitsLT(NewVT
)) // Value is expanded, e.g. i64 -> i16.
668 return NumVectorRegs
*(NewVT
.getSizeInBits()/DestVT
.getSizeInBits());
670 // Otherwise, promotion or legal types use the same number of registers as
671 // the vector decimated to the appropriate level.
672 return NumVectorRegs
;
675 /// isLegalRC - Return true if the value types that can be represented by the
676 /// specified register class are all legal.
677 bool TargetLowering::isLegalRC(const TargetRegisterClass
*RC
) const {
678 for (TargetRegisterClass::vt_iterator I
= RC
->vt_begin(), E
= RC
->vt_end();
686 /// hasLegalSuperRegRegClasses - Return true if the specified register class
687 /// has one or more super-reg register classes that are legal.
689 TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass
*RC
) const{
690 if (*RC
->superregclasses_begin() == 0)
692 for (TargetRegisterInfo::regclass_iterator I
= RC
->superregclasses_begin(),
693 E
= RC
->superregclasses_end(); I
!= E
; ++I
) {
694 const TargetRegisterClass
*RRC
= *I
;
701 /// findRepresentativeClass - Return the largest legal super-reg register class
702 /// of the register class for the specified type and its associated "cost".
703 std::pair
<const TargetRegisterClass
*, uint8_t>
704 TargetLowering::findRepresentativeClass(EVT VT
) const {
705 const TargetRegisterClass
*RC
= RegClassForVT
[VT
.getSimpleVT().SimpleTy
];
707 return std::make_pair(RC
, 0);
708 const TargetRegisterClass
*BestRC
= RC
;
709 for (TargetRegisterInfo::regclass_iterator I
= RC
->superregclasses_begin(),
710 E
= RC
->superregclasses_end(); I
!= E
; ++I
) {
711 const TargetRegisterClass
*RRC
= *I
;
712 if (RRC
->isASubClass() || !isLegalRC(RRC
))
714 if (!hasLegalSuperRegRegClasses(RRC
))
715 return std::make_pair(RRC
, 1);
718 return std::make_pair(BestRC
, 1);
722 /// computeRegisterProperties - Once all of the register classes are added,
723 /// this allows us to compute derived properties we expose.
724 void TargetLowering::computeRegisterProperties() {
725 assert(MVT::LAST_VALUETYPE
<= MVT::MAX_ALLOWED_VALUETYPE
&&
726 "Too many value types for ValueTypeActions to hold!");
728 // Everything defaults to needing one register.
729 for (unsigned i
= 0; i
!= MVT::LAST_VALUETYPE
; ++i
) {
730 NumRegistersForVT
[i
] = 1;
731 RegisterTypeForVT
[i
] = TransformToType
[i
] = (MVT::SimpleValueType
)i
;
733 // ...except isVoid, which doesn't need any registers.
734 NumRegistersForVT
[MVT::isVoid
] = 0;
736 // Find the largest integer register class.
737 unsigned LargestIntReg
= MVT::LAST_INTEGER_VALUETYPE
;
738 for (; RegClassForVT
[LargestIntReg
] == 0; --LargestIntReg
)
739 assert(LargestIntReg
!= MVT::i1
&& "No integer registers defined!");
741 // Every integer value type larger than this largest register takes twice as
742 // many registers to represent as the previous ValueType.
743 for (unsigned ExpandedReg
= LargestIntReg
+ 1; ; ++ExpandedReg
) {
744 EVT ExpandedVT
= (MVT::SimpleValueType
)ExpandedReg
;
745 if (!ExpandedVT
.isInteger())
747 NumRegistersForVT
[ExpandedReg
] = 2*NumRegistersForVT
[ExpandedReg
-1];
748 RegisterTypeForVT
[ExpandedReg
] = (MVT::SimpleValueType
)LargestIntReg
;
749 TransformToType
[ExpandedReg
] = (MVT::SimpleValueType
)(ExpandedReg
- 1);
750 ValueTypeActions
.setTypeAction(ExpandedVT
, Expand
);
753 // Inspect all of the ValueType's smaller than the largest integer
754 // register to see which ones need promotion.
755 unsigned LegalIntReg
= LargestIntReg
;
756 for (unsigned IntReg
= LargestIntReg
- 1;
757 IntReg
>= (unsigned)MVT::i1
; --IntReg
) {
758 EVT IVT
= (MVT::SimpleValueType
)IntReg
;
759 if (isTypeLegal(IVT
)) {
760 LegalIntReg
= IntReg
;
762 RegisterTypeForVT
[IntReg
] = TransformToType
[IntReg
] =
763 (MVT::SimpleValueType
)LegalIntReg
;
764 ValueTypeActions
.setTypeAction(IVT
, Promote
);
768 // ppcf128 type is really two f64's.
769 if (!isTypeLegal(MVT::ppcf128
)) {
770 NumRegistersForVT
[MVT::ppcf128
] = 2*NumRegistersForVT
[MVT::f64
];
771 RegisterTypeForVT
[MVT::ppcf128
] = MVT::f64
;
772 TransformToType
[MVT::ppcf128
] = MVT::f64
;
773 ValueTypeActions
.setTypeAction(MVT::ppcf128
, Expand
);
776 // Decide how to handle f64. If the target does not have native f64 support,
777 // expand it to i64 and we will be generating soft float library calls.
778 if (!isTypeLegal(MVT::f64
)) {
779 NumRegistersForVT
[MVT::f64
] = NumRegistersForVT
[MVT::i64
];
780 RegisterTypeForVT
[MVT::f64
] = RegisterTypeForVT
[MVT::i64
];
781 TransformToType
[MVT::f64
] = MVT::i64
;
782 ValueTypeActions
.setTypeAction(MVT::f64
, Expand
);
785 // Decide how to handle f32. If the target does not have native support for
786 // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
787 if (!isTypeLegal(MVT::f32
)) {
788 if (isTypeLegal(MVT::f64
)) {
789 NumRegistersForVT
[MVT::f32
] = NumRegistersForVT
[MVT::f64
];
790 RegisterTypeForVT
[MVT::f32
] = RegisterTypeForVT
[MVT::f64
];
791 TransformToType
[MVT::f32
] = MVT::f64
;
792 ValueTypeActions
.setTypeAction(MVT::f32
, Promote
);
794 NumRegistersForVT
[MVT::f32
] = NumRegistersForVT
[MVT::i32
];
795 RegisterTypeForVT
[MVT::f32
] = RegisterTypeForVT
[MVT::i32
];
796 TransformToType
[MVT::f32
] = MVT::i32
;
797 ValueTypeActions
.setTypeAction(MVT::f32
, Expand
);
801 // Loop over all of the vector value types to see which need transformations.
802 for (unsigned i
= MVT::FIRST_VECTOR_VALUETYPE
;
803 i
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++i
) {
804 MVT VT
= (MVT::SimpleValueType
)i
;
805 if (isTypeLegal(VT
)) continue;
807 // Determine if there is a legal wider type. If so, we should promote to
808 // that wider vector type.
809 EVT EltVT
= VT
.getVectorElementType();
810 unsigned NElts
= VT
.getVectorNumElements();
812 bool IsLegalWiderType
= false;
813 for (unsigned nVT
= i
+1; nVT
<= MVT::LAST_VECTOR_VALUETYPE
; ++nVT
) {
814 EVT SVT
= (MVT::SimpleValueType
)nVT
;
815 if (SVT
.getVectorElementType() == EltVT
&&
816 SVT
.getVectorNumElements() > NElts
&&
818 TransformToType
[i
] = SVT
;
819 RegisterTypeForVT
[i
] = SVT
;
820 NumRegistersForVT
[i
] = 1;
821 ValueTypeActions
.setTypeAction(VT
, Promote
);
822 IsLegalWiderType
= true;
826 if (IsLegalWiderType
) continue;
831 unsigned NumIntermediates
;
832 NumRegistersForVT
[i
] =
833 getVectorTypeBreakdownMVT(VT
, IntermediateVT
, NumIntermediates
,
835 RegisterTypeForVT
[i
] = RegisterVT
;
837 EVT NVT
= VT
.getPow2VectorType();
839 // Type is already a power of 2. The default action is to split.
840 TransformToType
[i
] = MVT::Other
;
841 ValueTypeActions
.setTypeAction(VT
, Expand
);
843 TransformToType
[i
] = NVT
;
844 ValueTypeActions
.setTypeAction(VT
, Promote
);
848 // Determine the 'representative' register class for each value type.
849 // An representative register class is the largest (meaning one which is
850 // not a sub-register class / subreg register class) legal register class for
851 // a group of value types. For example, on i386, i8, i16, and i32
852 // representative would be GR32; while on x86_64 it's GR64.
853 for (unsigned i
= 0; i
!= MVT::LAST_VALUETYPE
; ++i
) {
854 const TargetRegisterClass
* RRC
;
856 tie(RRC
, Cost
) = findRepresentativeClass((MVT::SimpleValueType
)i
);
857 RepRegClassForVT
[i
] = RRC
;
858 RepRegClassCostForVT
[i
] = Cost
;
862 const char *TargetLowering::getTargetNodeName(unsigned Opcode
) const {
867 MVT::SimpleValueType
TargetLowering::getSetCCResultType(EVT VT
) const {
868 return PointerTy
.SimpleTy
;
871 MVT::SimpleValueType
TargetLowering::getCmpLibcallReturnType() const {
872 return MVT::i32
; // return the default value
875 /// getVectorTypeBreakdown - Vector types are broken down into some number of
876 /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
877 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
878 /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
880 /// This method returns the number of registers needed, and the VT for each
881 /// register. It also returns the VT and quantity of the intermediate values
882 /// before they are promoted/expanded.
884 unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext
&Context
, EVT VT
,
886 unsigned &NumIntermediates
,
887 EVT
&RegisterVT
) const {
888 unsigned NumElts
= VT
.getVectorNumElements();
890 // If there is a wider vector type with the same element type as this one,
891 // we should widen to that legal vector type. This handles things like
892 // <2 x float> -> <4 x float>.
893 if (NumElts
!= 1 && getTypeAction(VT
) == Promote
) {
894 RegisterVT
= getTypeToTransformTo(Context
, VT
);
895 if (isTypeLegal(RegisterVT
)) {
896 IntermediateVT
= RegisterVT
;
897 NumIntermediates
= 1;
902 // Figure out the right, legal destination reg to copy into.
903 EVT EltTy
= VT
.getVectorElementType();
905 unsigned NumVectorRegs
= 1;
907 // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
908 // could break down into LHS/RHS like LegalizeDAG does.
909 if (!isPowerOf2_32(NumElts
)) {
910 NumVectorRegs
= NumElts
;
914 // Divide the input until we get to a supported size. This will always
915 // end with a scalar if the target doesn't support vectors.
916 while (NumElts
> 1 && !isTypeLegal(
917 EVT::getVectorVT(Context
, EltTy
, NumElts
))) {
922 NumIntermediates
= NumVectorRegs
;
924 EVT NewVT
= EVT::getVectorVT(Context
, EltTy
, NumElts
);
925 if (!isTypeLegal(NewVT
))
927 IntermediateVT
= NewVT
;
929 EVT DestVT
= getRegisterType(Context
, NewVT
);
931 if (DestVT
.bitsLT(NewVT
)) // Value is expanded, e.g. i64 -> i16.
932 return NumVectorRegs
*(NewVT
.getSizeInBits()/DestVT
.getSizeInBits());
934 // Otherwise, promotion or legal types use the same number of registers as
935 // the vector decimated to the appropriate level.
936 return NumVectorRegs
;
939 /// Get the EVTs and ArgFlags collections that represent the legalized return
940 /// type of the given function. This does not require a DAG or a return value,
941 /// and is suitable for use before any DAGs for the function are constructed.
942 /// TODO: Move this out of TargetLowering.cpp.
943 void llvm::GetReturnInfo(const Type
* ReturnType
, Attributes attr
,
944 SmallVectorImpl
<ISD::OutputArg
> &Outs
,
945 const TargetLowering
&TLI
,
946 SmallVectorImpl
<uint64_t> *Offsets
) {
947 SmallVector
<EVT
, 4> ValueVTs
;
948 ComputeValueVTs(TLI
, ReturnType
, ValueVTs
);
949 unsigned NumValues
= ValueVTs
.size();
950 if (NumValues
== 0) return;
953 for (unsigned j
= 0, f
= NumValues
; j
!= f
; ++j
) {
954 EVT VT
= ValueVTs
[j
];
955 ISD::NodeType ExtendKind
= ISD::ANY_EXTEND
;
957 if (attr
& Attribute::SExt
)
958 ExtendKind
= ISD::SIGN_EXTEND
;
959 else if (attr
& Attribute::ZExt
)
960 ExtendKind
= ISD::ZERO_EXTEND
;
962 // FIXME: C calling convention requires the return type to be promoted to
963 // at least 32-bit. But this is not necessary for non-C calling
964 // conventions. The frontend should mark functions whose return values
965 // require promoting with signext or zeroext attributes.
966 if (ExtendKind
!= ISD::ANY_EXTEND
&& VT
.isInteger()) {
967 EVT MinVT
= TLI
.getRegisterType(ReturnType
->getContext(), MVT::i32
);
968 if (VT
.bitsLT(MinVT
))
972 unsigned NumParts
= TLI
.getNumRegisters(ReturnType
->getContext(), VT
);
973 EVT PartVT
= TLI
.getRegisterType(ReturnType
->getContext(), VT
);
974 unsigned PartSize
= TLI
.getTargetData()->getTypeAllocSize(
975 PartVT
.getTypeForEVT(ReturnType
->getContext()));
977 // 'inreg' on function refers to return value
978 ISD::ArgFlagsTy Flags
= ISD::ArgFlagsTy();
979 if (attr
& Attribute::InReg
)
982 // Propagate extension type if any
983 if (attr
& Attribute::SExt
)
985 else if (attr
& Attribute::ZExt
)
988 for (unsigned i
= 0; i
< NumParts
; ++i
) {
989 Outs
.push_back(ISD::OutputArg(Flags
, PartVT
, /*isFixed=*/true));
991 Offsets
->push_back(Offset
);
998 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
999 /// function arguments in the caller parameter area. This is the actual
1000 /// alignment, not its logarithm.
1001 unsigned TargetLowering::getByValTypeAlignment(const Type
*Ty
) const {
1002 return TD
->getCallFrameTypeAlignment(Ty
);
1005 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
1006 /// current function. The returned value is a member of the
1007 /// MachineJumpTableInfo::JTEntryKind enum.
1008 unsigned TargetLowering::getJumpTableEncoding() const {
1009 // In non-pic modes, just use the address of a block.
1010 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
1011 return MachineJumpTableInfo::EK_BlockAddress
;
1013 // In PIC mode, if the target supports a GPRel32 directive, use it.
1014 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
1015 return MachineJumpTableInfo::EK_GPRel32BlockAddress
;
1017 // Otherwise, use a label difference.
1018 return MachineJumpTableInfo::EK_LabelDifference32
;
1021 SDValue
TargetLowering::getPICJumpTableRelocBase(SDValue Table
,
1022 SelectionDAG
&DAG
) const {
1023 // If our PIC model is GP relative, use the global offset table as the base.
1024 if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress
)
1025 return DAG
.getGLOBAL_OFFSET_TABLE(getPointerTy());
1029 /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
1030 /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
1033 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
1034 unsigned JTI
,MCContext
&Ctx
) const{
1035 // The normal PIC reloc base is the label at the start of the jump table.
1036 return MCSymbolRefExpr::Create(MF
->getJTISymbol(JTI
, Ctx
), Ctx
);
1040 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
1041 // Assume that everything is safe in static mode.
1042 if (getTargetMachine().getRelocationModel() == Reloc::Static
)
1045 // In dynamic-no-pic mode, assume that known defined values are safe.
1046 if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC
&&
1048 !GA
->getGlobal()->isDeclaration() &&
1049 !GA
->getGlobal()->isWeakForLinker())
1052 // Otherwise assume nothing is safe.
1056 //===----------------------------------------------------------------------===//
1057 // Optimization Methods
1058 //===----------------------------------------------------------------------===//
1060 /// ShrinkDemandedConstant - Check to see if the specified operand of the
1061 /// specified instruction is a constant integer. If so, check to see if there
1062 /// are any bits set in the constant that are not demanded. If so, shrink the
1063 /// constant and return true.
1064 bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op
,
1065 const APInt
&Demanded
) {
1066 DebugLoc dl
= Op
.getDebugLoc();
1068 // FIXME: ISD::SELECT, ISD::SELECT_CC
1069 switch (Op
.getOpcode()) {
1074 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
1075 if (!C
) return false;
1077 if (Op
.getOpcode() == ISD::XOR
&&
1078 (C
->getAPIntValue() | (~Demanded
)).isAllOnesValue())
1081 // if we can expand it to have all bits set, do it
1082 if (C
->getAPIntValue().intersects(~Demanded
)) {
1083 EVT VT
= Op
.getValueType();
1084 SDValue New
= DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op
.getOperand(0),
1085 DAG
.getConstant(Demanded
&
1088 return CombineTo(Op
, New
);
1098 /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
1099 /// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
1100 /// cast, but it could be generalized for targets with other types of
1101 /// implicit widening casts.
1103 TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op
,
1105 const APInt
&Demanded
,
1107 assert(Op
.getNumOperands() == 2 &&
1108 "ShrinkDemandedOp only supports binary operators!");
1109 assert(Op
.getNode()->getNumValues() == 1 &&
1110 "ShrinkDemandedOp only supports nodes with one result!");
1112 // Don't do this if the node has another user, which may require the
1114 if (!Op
.getNode()->hasOneUse())
1117 // Search for the smallest integer type with free casts to and from
1118 // Op's type. For expedience, just check power-of-2 integer types.
1119 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1120 unsigned SmallVTBits
= BitWidth
- Demanded
.countLeadingZeros();
1121 if (!isPowerOf2_32(SmallVTBits
))
1122 SmallVTBits
= NextPowerOf2(SmallVTBits
);
1123 for (; SmallVTBits
< BitWidth
; SmallVTBits
= NextPowerOf2(SmallVTBits
)) {
1124 EVT SmallVT
= EVT::getIntegerVT(*DAG
.getContext(), SmallVTBits
);
1125 if (TLI
.isTruncateFree(Op
.getValueType(), SmallVT
) &&
1126 TLI
.isZExtFree(SmallVT
, Op
.getValueType())) {
1127 // We found a type with free casts.
1128 SDValue X
= DAG
.getNode(Op
.getOpcode(), dl
, SmallVT
,
1129 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
,
1130 Op
.getNode()->getOperand(0)),
1131 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
,
1132 Op
.getNode()->getOperand(1)));
1133 SDValue Z
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, Op
.getValueType(), X
);
1134 return CombineTo(Op
, Z
);
1140 /// SimplifyDemandedBits - Look at Op. At this point, we know that only the
1141 /// DemandedMask bits of the result of Op are ever used downstream. If we can
1142 /// use this information to simplify Op, create a new simplified DAG node and
1143 /// return true, returning the original and new nodes in Old and New. Otherwise,
1144 /// analyze the expression and return a mask of KnownOne and KnownZero bits for
1145 /// the expression (used to simplify the caller). The KnownZero/One bits may
1146 /// only be accurate for those bits in the DemandedMask.
1147 bool TargetLowering::SimplifyDemandedBits(SDValue Op
,
1148 const APInt
&DemandedMask
,
1151 TargetLoweringOpt
&TLO
,
1152 unsigned Depth
) const {
1153 unsigned BitWidth
= DemandedMask
.getBitWidth();
1154 assert(Op
.getValueType().getScalarType().getSizeInBits() == BitWidth
&&
1155 "Mask size mismatches value type size!");
1156 APInt NewMask
= DemandedMask
;
1157 DebugLoc dl
= Op
.getDebugLoc();
1159 // Don't know anything.
1160 KnownZero
= KnownOne
= APInt(BitWidth
, 0);
1162 // Other users may use these bits.
1163 if (!Op
.getNode()->hasOneUse()) {
1165 // If not at the root, Just compute the KnownZero/KnownOne bits to
1166 // simplify things downstream.
1167 TLO
.DAG
.ComputeMaskedBits(Op
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
1170 // If this is the root being simplified, allow it to have multiple uses,
1171 // just set the NewMask to all bits.
1172 NewMask
= APInt::getAllOnesValue(BitWidth
);
1173 } else if (DemandedMask
== 0) {
1174 // Not demanding any bits from Op.
1175 if (Op
.getOpcode() != ISD::UNDEF
)
1176 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(Op
.getValueType()));
1178 } else if (Depth
== 6) { // Limit search depth.
1182 APInt KnownZero2
, KnownOne2
, KnownZeroOut
, KnownOneOut
;
1183 switch (Op
.getOpcode()) {
1185 // We know all of the bits for a constant!
1186 KnownOne
= cast
<ConstantSDNode
>(Op
)->getAPIntValue() & NewMask
;
1187 KnownZero
= ~KnownOne
& NewMask
;
1188 return false; // Don't fall through, will infinitely loop.
1190 // If the RHS is a constant, check to see if the LHS would be zero without
1191 // using the bits from the RHS. Below, we use knowledge about the RHS to
1192 // simplify the LHS, here we're using information from the LHS to simplify
1194 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1195 APInt LHSZero
, LHSOne
;
1196 // Do not increment Depth here; that can cause an infinite loop.
1197 TLO
.DAG
.ComputeMaskedBits(Op
.getOperand(0), NewMask
,
1198 LHSZero
, LHSOne
, Depth
);
1199 // If the LHS already has zeros where RHSC does, this and is dead.
1200 if ((LHSZero
& NewMask
) == (~RHSC
->getAPIntValue() & NewMask
))
1201 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1202 // If any of the set bits in the RHS are known zero on the LHS, shrink
1204 if (TLO
.ShrinkDemandedConstant(Op
, ~LHSZero
& NewMask
))
1208 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero
,
1209 KnownOne
, TLO
, Depth
+1))
1211 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1212 if (SimplifyDemandedBits(Op
.getOperand(0), ~KnownZero
& NewMask
,
1213 KnownZero2
, KnownOne2
, TLO
, Depth
+1))
1215 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1217 // If all of the demanded bits are known one on one side, return the other.
1218 // These bits cannot contribute to the result of the 'and'.
1219 if ((NewMask
& ~KnownZero2
& KnownOne
) == (~KnownZero2
& NewMask
))
1220 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1221 if ((NewMask
& ~KnownZero
& KnownOne2
) == (~KnownZero
& NewMask
))
1222 return TLO
.CombineTo(Op
, Op
.getOperand(1));
1223 // If all of the demanded bits in the inputs are known zeros, return zero.
1224 if ((NewMask
& (KnownZero
|KnownZero2
)) == NewMask
)
1225 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, Op
.getValueType()));
1226 // If the RHS is a constant, see if we can simplify it.
1227 if (TLO
.ShrinkDemandedConstant(Op
, ~KnownZero2
& NewMask
))
1229 // If the operation can be done in a smaller type, do so.
1230 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
1233 // Output known-1 bits are only known if set in both the LHS & RHS.
1234 KnownOne
&= KnownOne2
;
1235 // Output known-0 are known to be clear if zero in either the LHS | RHS.
1236 KnownZero
|= KnownZero2
;
1239 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero
,
1240 KnownOne
, TLO
, Depth
+1))
1242 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1243 if (SimplifyDemandedBits(Op
.getOperand(0), ~KnownOne
& NewMask
,
1244 KnownZero2
, KnownOne2
, TLO
, Depth
+1))
1246 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1248 // If all of the demanded bits are known zero on one side, return the other.
1249 // These bits cannot contribute to the result of the 'or'.
1250 if ((NewMask
& ~KnownOne2
& KnownZero
) == (~KnownOne2
& NewMask
))
1251 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1252 if ((NewMask
& ~KnownOne
& KnownZero2
) == (~KnownOne
& NewMask
))
1253 return TLO
.CombineTo(Op
, Op
.getOperand(1));
1254 // If all of the potentially set bits on one side are known to be set on
1255 // the other side, just use the 'other' side.
1256 if ((NewMask
& ~KnownZero
& KnownOne2
) == (~KnownZero
& NewMask
))
1257 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1258 if ((NewMask
& ~KnownZero2
& KnownOne
) == (~KnownZero2
& NewMask
))
1259 return TLO
.CombineTo(Op
, Op
.getOperand(1));
1260 // If the RHS is a constant, see if we can simplify it.
1261 if (TLO
.ShrinkDemandedConstant(Op
, NewMask
))
1263 // If the operation can be done in a smaller type, do so.
1264 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
1267 // Output known-0 bits are only known if clear in both the LHS & RHS.
1268 KnownZero
&= KnownZero2
;
1269 // Output known-1 are known to be set if set in either the LHS | RHS.
1270 KnownOne
|= KnownOne2
;
1273 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero
,
1274 KnownOne
, TLO
, Depth
+1))
1276 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1277 if (SimplifyDemandedBits(Op
.getOperand(0), NewMask
, KnownZero2
,
1278 KnownOne2
, TLO
, Depth
+1))
1280 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1282 // If all of the demanded bits are known zero on one side, return the other.
1283 // These bits cannot contribute to the result of the 'xor'.
1284 if ((KnownZero
& NewMask
) == NewMask
)
1285 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1286 if ((KnownZero2
& NewMask
) == NewMask
)
1287 return TLO
.CombineTo(Op
, Op
.getOperand(1));
1288 // If the operation can be done in a smaller type, do so.
1289 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
1292 // If all of the unknown bits are known to be zero on one side or the other
1293 // (but not both) turn this into an *inclusive* or.
1294 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1295 if ((NewMask
& ~KnownZero
& ~KnownZero2
) == 0)
1296 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::OR
, dl
, Op
.getValueType(),
1300 // Output known-0 bits are known if clear or set in both the LHS & RHS.
1301 KnownZeroOut
= (KnownZero
& KnownZero2
) | (KnownOne
& KnownOne2
);
1302 // Output known-1 are known to be set if set in only one of the LHS, RHS.
1303 KnownOneOut
= (KnownZero
& KnownOne2
) | (KnownOne
& KnownZero2
);
1305 // If all of the demanded bits on one side are known, and all of the set
1306 // bits on that side are also known to be set on the other side, turn this
1307 // into an AND, as we know the bits will be cleared.
1308 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1309 if ((NewMask
& (KnownZero
|KnownOne
)) == NewMask
) { // all known
1310 if ((KnownOne
& KnownOne2
) == KnownOne
) {
1311 EVT VT
= Op
.getValueType();
1312 SDValue ANDC
= TLO
.DAG
.getConstant(~KnownOne
& NewMask
, VT
);
1313 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::AND
, dl
, VT
,
1314 Op
.getOperand(0), ANDC
));
1318 // If the RHS is a constant, see if we can simplify it.
1319 // for XOR, we prefer to force bits to 1 if they will make a -1.
1320 // if we can't force bits, try to shrink constant
1321 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1322 APInt Expanded
= C
->getAPIntValue() | (~NewMask
);
1323 // if we can expand it to have all bits set, do it
1324 if (Expanded
.isAllOnesValue()) {
1325 if (Expanded
!= C
->getAPIntValue()) {
1326 EVT VT
= Op
.getValueType();
1327 SDValue New
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
,VT
, Op
.getOperand(0),
1328 TLO
.DAG
.getConstant(Expanded
, VT
));
1329 return TLO
.CombineTo(Op
, New
);
1331 // if it already has all the bits set, nothing to change
1332 // but don't shrink either!
1333 } else if (TLO
.ShrinkDemandedConstant(Op
, NewMask
)) {
1338 KnownZero
= KnownZeroOut
;
1339 KnownOne
= KnownOneOut
;
1342 if (SimplifyDemandedBits(Op
.getOperand(2), NewMask
, KnownZero
,
1343 KnownOne
, TLO
, Depth
+1))
1345 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero2
,
1346 KnownOne2
, TLO
, Depth
+1))
1348 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1349 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1351 // If the operands are constants, see if we can simplify them.
1352 if (TLO
.ShrinkDemandedConstant(Op
, NewMask
))
1355 // Only known if known in both the LHS and RHS.
1356 KnownOne
&= KnownOne2
;
1357 KnownZero
&= KnownZero2
;
1359 case ISD::SELECT_CC
:
1360 if (SimplifyDemandedBits(Op
.getOperand(3), NewMask
, KnownZero
,
1361 KnownOne
, TLO
, Depth
+1))
1363 if (SimplifyDemandedBits(Op
.getOperand(2), NewMask
, KnownZero2
,
1364 KnownOne2
, TLO
, Depth
+1))
1366 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1367 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1369 // If the operands are constants, see if we can simplify them.
1370 if (TLO
.ShrinkDemandedConstant(Op
, NewMask
))
1373 // Only known if known in both the LHS and RHS.
1374 KnownOne
&= KnownOne2
;
1375 KnownZero
&= KnownZero2
;
1378 if (ConstantSDNode
*SA
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1379 unsigned ShAmt
= SA
->getZExtValue();
1380 SDValue InOp
= Op
.getOperand(0);
1382 // If the shift count is an invalid immediate, don't do anything.
1383 if (ShAmt
>= BitWidth
)
1386 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1387 // single shift. We can do this if the bottom bits (which are shifted
1388 // out) are never demanded.
1389 if (InOp
.getOpcode() == ISD::SRL
&&
1390 isa
<ConstantSDNode
>(InOp
.getOperand(1))) {
1391 if (ShAmt
&& (NewMask
& APInt::getLowBitsSet(BitWidth
, ShAmt
)) == 0) {
1392 unsigned C1
= cast
<ConstantSDNode
>(InOp
.getOperand(1))->getZExtValue();
1393 unsigned Opc
= ISD::SHL
;
1394 int Diff
= ShAmt
-C1
;
1401 TLO
.DAG
.getConstant(Diff
, Op
.getOperand(1).getValueType());
1402 EVT VT
= Op
.getValueType();
1403 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
,
1404 InOp
.getOperand(0), NewSA
));
1408 if (SimplifyDemandedBits(InOp
, NewMask
.lshr(ShAmt
),
1409 KnownZero
, KnownOne
, TLO
, Depth
+1))
1412 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1413 // are not demanded. This will likely allow the anyext to be folded away.
1414 if (InOp
.getNode()->getOpcode() == ISD::ANY_EXTEND
) {
1415 SDValue InnerOp
= InOp
.getNode()->getOperand(0);
1416 EVT InnerVT
= InnerOp
.getValueType();
1417 if ((APInt::getHighBitsSet(BitWidth
,
1418 BitWidth
- InnerVT
.getSizeInBits()) &
1419 DemandedMask
) == 0 &&
1420 isTypeDesirableForOp(ISD::SHL
, InnerVT
)) {
1421 EVT ShTy
= getShiftAmountTy(InnerVT
);
1422 if (!APInt(BitWidth
, ShAmt
).isIntN(ShTy
.getSizeInBits()))
1425 TLO
.DAG
.getNode(ISD::SHL
, dl
, InnerVT
, InnerOp
,
1426 TLO
.DAG
.getConstant(ShAmt
, ShTy
));
1429 TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, Op
.getValueType(),
1434 KnownZero
<<= SA
->getZExtValue();
1435 KnownOne
<<= SA
->getZExtValue();
1436 // low bits known zero.
1437 KnownZero
|= APInt::getLowBitsSet(BitWidth
, SA
->getZExtValue());
1441 if (ConstantSDNode
*SA
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1442 EVT VT
= Op
.getValueType();
1443 unsigned ShAmt
= SA
->getZExtValue();
1444 unsigned VTSize
= VT
.getSizeInBits();
1445 SDValue InOp
= Op
.getOperand(0);
1447 // If the shift count is an invalid immediate, don't do anything.
1448 if (ShAmt
>= BitWidth
)
1451 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1452 // single shift. We can do this if the top bits (which are shifted out)
1453 // are never demanded.
1454 if (InOp
.getOpcode() == ISD::SHL
&&
1455 isa
<ConstantSDNode
>(InOp
.getOperand(1))) {
1456 if (ShAmt
&& (NewMask
& APInt::getHighBitsSet(VTSize
, ShAmt
)) == 0) {
1457 unsigned C1
= cast
<ConstantSDNode
>(InOp
.getOperand(1))->getZExtValue();
1458 unsigned Opc
= ISD::SRL
;
1459 int Diff
= ShAmt
-C1
;
1466 TLO
.DAG
.getConstant(Diff
, Op
.getOperand(1).getValueType());
1467 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
,
1468 InOp
.getOperand(0), NewSA
));
1472 // Compute the new bits that are at the top now.
1473 if (SimplifyDemandedBits(InOp
, (NewMask
<< ShAmt
),
1474 KnownZero
, KnownOne
, TLO
, Depth
+1))
1476 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1477 KnownZero
= KnownZero
.lshr(ShAmt
);
1478 KnownOne
= KnownOne
.lshr(ShAmt
);
1480 APInt HighBits
= APInt::getHighBitsSet(BitWidth
, ShAmt
);
1481 KnownZero
|= HighBits
; // High bits known zero.
1485 // If this is an arithmetic shift right and only the low-bit is set, we can
1486 // always convert this into a logical shr, even if the shift amount is
1487 // variable. The low bit of the shift cannot be an input sign bit unless
1488 // the shift amount is >= the size of the datatype, which is undefined.
1489 if (DemandedMask
== 1)
1490 return TLO
.CombineTo(Op
,
1491 TLO
.DAG
.getNode(ISD::SRL
, dl
, Op
.getValueType(),
1492 Op
.getOperand(0), Op
.getOperand(1)));
1494 if (ConstantSDNode
*SA
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1495 EVT VT
= Op
.getValueType();
1496 unsigned ShAmt
= SA
->getZExtValue();
1498 // If the shift count is an invalid immediate, don't do anything.
1499 if (ShAmt
>= BitWidth
)
1502 APInt InDemandedMask
= (NewMask
<< ShAmt
);
1504 // If any of the demanded bits are produced by the sign extension, we also
1505 // demand the input sign bit.
1506 APInt HighBits
= APInt::getHighBitsSet(BitWidth
, ShAmt
);
1507 if (HighBits
.intersects(NewMask
))
1508 InDemandedMask
|= APInt::getSignBit(VT
.getScalarType().getSizeInBits());
1510 if (SimplifyDemandedBits(Op
.getOperand(0), InDemandedMask
,
1511 KnownZero
, KnownOne
, TLO
, Depth
+1))
1513 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1514 KnownZero
= KnownZero
.lshr(ShAmt
);
1515 KnownOne
= KnownOne
.lshr(ShAmt
);
1517 // Handle the sign bit, adjusted to where it is now in the mask.
1518 APInt SignBit
= APInt::getSignBit(BitWidth
).lshr(ShAmt
);
1520 // If the input sign bit is known to be zero, or if none of the top bits
1521 // are demanded, turn this into an unsigned shift right.
1522 if (KnownZero
.intersects(SignBit
) || (HighBits
& ~NewMask
) == HighBits
) {
1523 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
,
1526 } else if (KnownOne
.intersects(SignBit
)) { // New bits are known one.
1527 KnownOne
|= HighBits
;
1531 case ISD::SIGN_EXTEND_INREG
: {
1532 EVT EVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1534 // Sign extension. Compute the demanded bits in the result that are not
1535 // present in the input.
1537 APInt::getHighBitsSet(BitWidth
,
1538 BitWidth
- EVT
.getScalarType().getSizeInBits());
1540 // If none of the extended bits are demanded, eliminate the sextinreg.
1541 if ((NewBits
& NewMask
) == 0)
1542 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1545 APInt::getSignBit(EVT
.getScalarType().getSizeInBits()).zext(BitWidth
);
1546 APInt InputDemandedBits
=
1547 APInt::getLowBitsSet(BitWidth
,
1548 EVT
.getScalarType().getSizeInBits()) &
1551 // Since the sign extended bits are demanded, we know that the sign
1553 InputDemandedBits
|= InSignBit
;
1555 if (SimplifyDemandedBits(Op
.getOperand(0), InputDemandedBits
,
1556 KnownZero
, KnownOne
, TLO
, Depth
+1))
1558 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1560 // If the sign bit of the input is known set or clear, then we know the
1561 // top bits of the result.
1563 // If the input sign bit is known zero, convert this into a zero extension.
1564 if (KnownZero
.intersects(InSignBit
))
1565 return TLO
.CombineTo(Op
,
1566 TLO
.DAG
.getZeroExtendInReg(Op
.getOperand(0),dl
,EVT
));
1568 if (KnownOne
.intersects(InSignBit
)) { // Input sign bit known set
1569 KnownOne
|= NewBits
;
1570 KnownZero
&= ~NewBits
;
1571 } else { // Input sign bit unknown
1572 KnownZero
&= ~NewBits
;
1573 KnownOne
&= ~NewBits
;
1577 case ISD::ZERO_EXTEND
: {
1578 unsigned OperandBitWidth
=
1579 Op
.getOperand(0).getValueType().getScalarType().getSizeInBits();
1580 APInt InMask
= NewMask
.trunc(OperandBitWidth
);
1582 // If none of the top bits are demanded, convert this into an any_extend.
1584 APInt::getHighBitsSet(BitWidth
, BitWidth
- OperandBitWidth
) & NewMask
;
1585 if (!NewBits
.intersects(NewMask
))
1586 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
,
1590 if (SimplifyDemandedBits(Op
.getOperand(0), InMask
,
1591 KnownZero
, KnownOne
, TLO
, Depth
+1))
1593 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1594 KnownZero
= KnownZero
.zext(BitWidth
);
1595 KnownOne
= KnownOne
.zext(BitWidth
);
1596 KnownZero
|= NewBits
;
1599 case ISD::SIGN_EXTEND
: {
1600 EVT InVT
= Op
.getOperand(0).getValueType();
1601 unsigned InBits
= InVT
.getScalarType().getSizeInBits();
1602 APInt InMask
= APInt::getLowBitsSet(BitWidth
, InBits
);
1603 APInt InSignBit
= APInt::getBitsSet(BitWidth
, InBits
- 1, InBits
);
1604 APInt NewBits
= ~InMask
& NewMask
;
1606 // If none of the top bits are demanded, convert this into an any_extend.
1608 return TLO
.CombineTo(Op
,TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
,
1612 // Since some of the sign extended bits are demanded, we know that the sign
1614 APInt InDemandedBits
= InMask
& NewMask
;
1615 InDemandedBits
|= InSignBit
;
1616 InDemandedBits
= InDemandedBits
.trunc(InBits
);
1618 if (SimplifyDemandedBits(Op
.getOperand(0), InDemandedBits
, KnownZero
,
1619 KnownOne
, TLO
, Depth
+1))
1621 KnownZero
= KnownZero
.zext(BitWidth
);
1622 KnownOne
= KnownOne
.zext(BitWidth
);
1624 // If the sign bit is known zero, convert this to a zero extend.
1625 if (KnownZero
.intersects(InSignBit
))
1626 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ZERO_EXTEND
, dl
,
1630 // If the sign bit is known one, the top bits match.
1631 if (KnownOne
.intersects(InSignBit
)) {
1632 KnownOne
|= NewBits
;
1633 KnownZero
&= ~NewBits
;
1634 } else { // Otherwise, top bits aren't known.
1635 KnownOne
&= ~NewBits
;
1636 KnownZero
&= ~NewBits
;
1640 case ISD::ANY_EXTEND
: {
1641 unsigned OperandBitWidth
=
1642 Op
.getOperand(0).getValueType().getScalarType().getSizeInBits();
1643 APInt InMask
= NewMask
.trunc(OperandBitWidth
);
1644 if (SimplifyDemandedBits(Op
.getOperand(0), InMask
,
1645 KnownZero
, KnownOne
, TLO
, Depth
+1))
1647 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1648 KnownZero
= KnownZero
.zext(BitWidth
);
1649 KnownOne
= KnownOne
.zext(BitWidth
);
1652 case ISD::TRUNCATE
: {
1653 // Simplify the input, using demanded bit information, and compute the known
1654 // zero/one bits live out.
1655 unsigned OperandBitWidth
=
1656 Op
.getOperand(0).getValueType().getScalarType().getSizeInBits();
1657 APInt TruncMask
= NewMask
.zext(OperandBitWidth
);
1658 if (SimplifyDemandedBits(Op
.getOperand(0), TruncMask
,
1659 KnownZero
, KnownOne
, TLO
, Depth
+1))
1661 KnownZero
= KnownZero
.trunc(BitWidth
);
1662 KnownOne
= KnownOne
.trunc(BitWidth
);
1664 // If the input is only used by this truncate, see if we can shrink it based
1665 // on the known demanded bits.
1666 if (Op
.getOperand(0).getNode()->hasOneUse()) {
1667 SDValue In
= Op
.getOperand(0);
1668 switch (In
.getOpcode()) {
1671 // Shrink SRL by a constant if none of the high bits shifted in are
1673 if (TLO
.LegalTypes() &&
1674 !isTypeDesirableForOp(ISD::SRL
, Op
.getValueType()))
1675 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1678 ConstantSDNode
*ShAmt
= dyn_cast
<ConstantSDNode
>(In
.getOperand(1));
1681 APInt HighBits
= APInt::getHighBitsSet(OperandBitWidth
,
1682 OperandBitWidth
- BitWidth
);
1683 HighBits
= HighBits
.lshr(ShAmt
->getZExtValue()).trunc(BitWidth
);
1685 if (ShAmt
->getZExtValue() < BitWidth
&& !(HighBits
& NewMask
)) {
1686 // None of the shifted in bits are needed. Add a truncate of the
1687 // shift input, then shift it.
1688 SDValue NewTrunc
= TLO
.DAG
.getNode(ISD::TRUNCATE
, dl
,
1691 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
,
1700 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1703 case ISD::AssertZext
: {
1704 // Demand all the bits of the input that are demanded in the output.
1705 // The low bits are obvious; the high bits are demanded because we're
1706 // asserting that they're zero here.
1707 if (SimplifyDemandedBits(Op
.getOperand(0), NewMask
,
1708 KnownZero
, KnownOne
, TLO
, Depth
+1))
1710 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1712 EVT VT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1713 APInt InMask
= APInt::getLowBitsSet(BitWidth
,
1714 VT
.getSizeInBits());
1715 KnownZero
|= ~InMask
& NewMask
;
1720 // If this is an FP->Int bitcast and if the sign bit is the only thing that
1721 // is demanded, turn this into a FGETSIGN.
1722 if (NewMask
== EVT::getIntegerVTSignBit(Op
.getValueType()) &&
1723 MVT::isFloatingPoint(Op
.getOperand(0).getValueType()) &&
1724 !MVT::isVector(Op
.getOperand(0).getValueType())) {
1725 // Only do this xform if FGETSIGN is valid or if before legalize.
1726 if (!TLO
.AfterLegalize
||
1727 isOperationLegal(ISD::FGETSIGN
, Op
.getValueType())) {
1728 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1729 // place. We expect the SHL to be eliminated by other optimizations.
1730 SDValue Sign
= TLO
.DAG
.getNode(ISD::FGETSIGN
, Op
.getValueType(),
1732 unsigned ShVal
= Op
.getValueType().getSizeInBits()-1;
1733 SDValue ShAmt
= TLO
.DAG
.getConstant(ShVal
, getShiftAmountTy());
1734 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SHL
, Op
.getValueType(),
1743 // Add, Sub, and Mul don't demand any bits in positions beyond that
1744 // of the highest bit demanded of them.
1745 APInt LoMask
= APInt::getLowBitsSet(BitWidth
,
1746 BitWidth
- NewMask
.countLeadingZeros());
1747 if (SimplifyDemandedBits(Op
.getOperand(0), LoMask
, KnownZero2
,
1748 KnownOne2
, TLO
, Depth
+1))
1750 if (SimplifyDemandedBits(Op
.getOperand(1), LoMask
, KnownZero2
,
1751 KnownOne2
, TLO
, Depth
+1))
1753 // See if the operation should be performed at a smaller bit width.
1754 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
1759 // Just use ComputeMaskedBits to compute output bits.
1760 TLO
.DAG
.ComputeMaskedBits(Op
, NewMask
, KnownZero
, KnownOne
, Depth
);
1764 // If we know the value of all of the demanded bits, return this as a
1766 if ((NewMask
& (KnownZero
|KnownOne
)) == NewMask
)
1767 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(KnownOne
, Op
.getValueType()));
1772 /// computeMaskedBitsForTargetNode - Determine which of the bits specified
1773 /// in Mask are known to be either zero or one and return them in the
1774 /// KnownZero/KnownOne bitsets.
1775 void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
1779 const SelectionDAG
&DAG
,
1780 unsigned Depth
) const {
1781 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
1782 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
1783 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
1784 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
1785 "Should use MaskedValueIsZero if you don't know whether Op"
1786 " is a target node!");
1787 KnownZero
= KnownOne
= APInt(Mask
.getBitWidth(), 0);
1790 /// ComputeNumSignBitsForTargetNode - This method can be implemented by
1791 /// targets that want to expose additional information about sign bits to the
1793 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op
,
1794 unsigned Depth
) const {
1795 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
1796 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
1797 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
1798 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
1799 "Should use ComputeNumSignBits if you don't know whether Op"
1800 " is a target node!");
1804 /// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
1805 /// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
1806 /// determine which bit is set.
1808 static bool ValueHasExactlyOneBitSet(SDValue Val
, const SelectionDAG
&DAG
) {
1809 // A left-shift of a constant one will have exactly one bit set, because
1810 // shifting the bit off the end is undefined.
1811 if (Val
.getOpcode() == ISD::SHL
)
1812 if (ConstantSDNode
*C
=
1813 dyn_cast
<ConstantSDNode
>(Val
.getNode()->getOperand(0)))
1814 if (C
->getAPIntValue() == 1)
1817 // Similarly, a right-shift of a constant sign-bit will have exactly
1819 if (Val
.getOpcode() == ISD::SRL
)
1820 if (ConstantSDNode
*C
=
1821 dyn_cast
<ConstantSDNode
>(Val
.getNode()->getOperand(0)))
1822 if (C
->getAPIntValue().isSignBit())
1825 // More could be done here, though the above checks are enough
1826 // to handle some common cases.
1828 // Fall back to ComputeMaskedBits to catch other known cases.
1829 EVT OpVT
= Val
.getValueType();
1830 unsigned BitWidth
= OpVT
.getScalarType().getSizeInBits();
1831 APInt Mask
= APInt::getAllOnesValue(BitWidth
);
1832 APInt KnownZero
, KnownOne
;
1833 DAG
.ComputeMaskedBits(Val
, Mask
, KnownZero
, KnownOne
);
1834 return (KnownZero
.countPopulation() == BitWidth
- 1) &&
1835 (KnownOne
.countPopulation() == 1);
1838 /// SimplifySetCC - Try to simplify a setcc built with the specified operands
1839 /// and cc. If it is unable to simplify it, return a null SDValue.
1841 TargetLowering::SimplifySetCC(EVT VT
, SDValue N0
, SDValue N1
,
1842 ISD::CondCode Cond
, bool foldBooleans
,
1843 DAGCombinerInfo
&DCI
, DebugLoc dl
) const {
1844 SelectionDAG
&DAG
= DCI
.DAG
;
1845 LLVMContext
&Context
= *DAG
.getContext();
1847 // These setcc operations always fold.
1851 case ISD::SETFALSE2
: return DAG
.getConstant(0, VT
);
1853 case ISD::SETTRUE2
: return DAG
.getConstant(1, VT
);
1856 if (isa
<ConstantSDNode
>(N0
.getNode())) {
1857 // Ensure that the constant occurs on the RHS, and fold constant
1859 return DAG
.getSetCC(dl
, VT
, N1
, N0
, ISD::getSetCCSwappedOperands(Cond
));
1862 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode())) {
1863 const APInt
&C1
= N1C
->getAPIntValue();
1865 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
1866 // equality comparison, then we're just comparing whether X itself is
1868 if (N0
.getOpcode() == ISD::SRL
&& (C1
== 0 || C1
== 1) &&
1869 N0
.getOperand(0).getOpcode() == ISD::CTLZ
&&
1870 N0
.getOperand(1).getOpcode() == ISD::Constant
) {
1872 = cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
1873 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
1874 ShAmt
== Log2_32(N0
.getValueType().getSizeInBits())) {
1875 if ((C1
== 0) == (Cond
== ISD::SETEQ
)) {
1876 // (srl (ctlz x), 5) == 0 -> X != 0
1877 // (srl (ctlz x), 5) != 1 -> X != 0
1880 // (srl (ctlz x), 5) != 0 -> X == 0
1881 // (srl (ctlz x), 5) == 1 -> X == 0
1884 SDValue Zero
= DAG
.getConstant(0, N0
.getValueType());
1885 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0).getOperand(0),
1891 // Look through truncs that don't change the value of a ctpop.
1892 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::TRUNCATE
)
1893 CTPOP
= N0
.getOperand(0);
1895 if (CTPOP
.hasOneUse() && CTPOP
.getOpcode() == ISD::CTPOP
&&
1896 (N0
== CTPOP
|| N0
.getValueType().getSizeInBits() >
1897 Log2_32_Ceil(CTPOP
.getValueType().getSizeInBits()))) {
1898 EVT CTVT
= CTPOP
.getValueType();
1899 SDValue CTOp
= CTPOP
.getOperand(0);
1901 // (ctpop x) u< 2 -> (x & x-1) == 0
1902 // (ctpop x) u> 1 -> (x & x-1) != 0
1903 if ((Cond
== ISD::SETULT
&& C1
== 2) || (Cond
== ISD::SETUGT
&& C1
== 1)){
1904 SDValue Sub
= DAG
.getNode(ISD::SUB
, dl
, CTVT
, CTOp
,
1905 DAG
.getConstant(1, CTVT
));
1906 SDValue And
= DAG
.getNode(ISD::AND
, dl
, CTVT
, CTOp
, Sub
);
1907 ISD::CondCode CC
= Cond
== ISD::SETULT
? ISD::SETEQ
: ISD::SETNE
;
1908 return DAG
.getSetCC(dl
, VT
, And
, DAG
.getConstant(0, CTVT
), CC
);
1911 // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
1914 // If the LHS is '(and load, const)', the RHS is 0,
1915 // the test is for equality or unsigned, and all 1 bits of the const are
1916 // in the same partial word, see if we can shorten the load.
1917 if (DCI
.isBeforeLegalize() &&
1918 N0
.getOpcode() == ISD::AND
&& C1
== 0 &&
1919 N0
.getNode()->hasOneUse() &&
1920 isa
<LoadSDNode
>(N0
.getOperand(0)) &&
1921 N0
.getOperand(0).getNode()->hasOneUse() &&
1922 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
1923 LoadSDNode
*Lod
= cast
<LoadSDNode
>(N0
.getOperand(0));
1925 unsigned bestWidth
= 0, bestOffset
= 0;
1926 if (!Lod
->isVolatile() && Lod
->isUnindexed()) {
1927 unsigned origWidth
= N0
.getValueType().getSizeInBits();
1928 unsigned maskWidth
= origWidth
;
1929 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
1930 // 8 bits, but have to be careful...
1931 if (Lod
->getExtensionType() != ISD::NON_EXTLOAD
)
1932 origWidth
= Lod
->getMemoryVT().getSizeInBits();
1934 cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
1935 for (unsigned width
= origWidth
/ 2; width
>=8; width
/= 2) {
1936 APInt newMask
= APInt::getLowBitsSet(maskWidth
, width
);
1937 for (unsigned offset
=0; offset
<origWidth
/width
; offset
++) {
1938 if ((newMask
& Mask
) == Mask
) {
1939 if (!TD
->isLittleEndian())
1940 bestOffset
= (origWidth
/width
- offset
- 1) * (width
/8);
1942 bestOffset
= (uint64_t)offset
* (width
/8);
1943 bestMask
= Mask
.lshr(offset
* (width
/8) * 8);
1947 newMask
= newMask
<< width
;
1952 EVT newVT
= EVT::getIntegerVT(Context
, bestWidth
);
1953 if (newVT
.isRound()) {
1954 EVT PtrType
= Lod
->getOperand(1).getValueType();
1955 SDValue Ptr
= Lod
->getBasePtr();
1956 if (bestOffset
!= 0)
1957 Ptr
= DAG
.getNode(ISD::ADD
, dl
, PtrType
, Lod
->getBasePtr(),
1958 DAG
.getConstant(bestOffset
, PtrType
));
1959 unsigned NewAlign
= MinAlign(Lod
->getAlignment(), bestOffset
);
1960 SDValue NewLoad
= DAG
.getLoad(newVT
, dl
, Lod
->getChain(), Ptr
,
1961 Lod
->getPointerInfo().getWithOffset(bestOffset
),
1962 false, false, NewAlign
);
1963 return DAG
.getSetCC(dl
, VT
,
1964 DAG
.getNode(ISD::AND
, dl
, newVT
, NewLoad
,
1965 DAG
.getConstant(bestMask
.trunc(bestWidth
),
1967 DAG
.getConstant(0LL, newVT
), Cond
);
1972 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
1973 if (N0
.getOpcode() == ISD::ZERO_EXTEND
) {
1974 unsigned InSize
= N0
.getOperand(0).getValueType().getSizeInBits();
1976 // If the comparison constant has bits in the upper part, the
1977 // zero-extended value could never match.
1978 if (C1
.intersects(APInt::getHighBitsSet(C1
.getBitWidth(),
1979 C1
.getBitWidth() - InSize
))) {
1983 case ISD::SETEQ
: return DAG
.getConstant(0, VT
);
1986 case ISD::SETNE
: return DAG
.getConstant(1, VT
);
1989 // True if the sign bit of C1 is set.
1990 return DAG
.getConstant(C1
.isNegative(), VT
);
1993 // True if the sign bit of C1 isn't set.
1994 return DAG
.getConstant(C1
.isNonNegative(), VT
);
2000 // Otherwise, we can perform the comparison with the low bits.
2008 EVT newVT
= N0
.getOperand(0).getValueType();
2009 if (DCI
.isBeforeLegalizeOps() ||
2010 (isOperationLegal(ISD::SETCC
, newVT
) &&
2011 getCondCodeAction(Cond
, newVT
)==Legal
))
2012 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
2013 DAG
.getConstant(C1
.trunc(InSize
), newVT
),
2018 break; // todo, be more careful with signed comparisons
2020 } else if (N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
2021 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
2022 EVT ExtSrcTy
= cast
<VTSDNode
>(N0
.getOperand(1))->getVT();
2023 unsigned ExtSrcTyBits
= ExtSrcTy
.getSizeInBits();
2024 EVT ExtDstTy
= N0
.getValueType();
2025 unsigned ExtDstTyBits
= ExtDstTy
.getSizeInBits();
2027 // If the constant doesn't fit into the number of bits for the source of
2028 // the sign extension, it is impossible for both sides to be equal.
2029 if (C1
.getMinSignedBits() > ExtSrcTyBits
)
2030 return DAG
.getConstant(Cond
== ISD::SETNE
, VT
);
2033 EVT Op0Ty
= N0
.getOperand(0).getValueType();
2034 if (Op0Ty
== ExtSrcTy
) {
2035 ZextOp
= N0
.getOperand(0);
2037 APInt Imm
= APInt::getLowBitsSet(ExtDstTyBits
, ExtSrcTyBits
);
2038 ZextOp
= DAG
.getNode(ISD::AND
, dl
, Op0Ty
, N0
.getOperand(0),
2039 DAG
.getConstant(Imm
, Op0Ty
));
2041 if (!DCI
.isCalledByLegalizer())
2042 DCI
.AddToWorklist(ZextOp
.getNode());
2043 // Otherwise, make this a use of a zext.
2044 return DAG
.getSetCC(dl
, VT
, ZextOp
,
2045 DAG
.getConstant(C1
& APInt::getLowBitsSet(
2050 } else if ((N1C
->isNullValue() || N1C
->getAPIntValue() == 1) &&
2051 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
2052 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
2053 if (N0
.getOpcode() == ISD::SETCC
&&
2054 isTypeLegal(VT
) && VT
.bitsLE(N0
.getValueType())) {
2055 bool TrueWhenTrue
= (Cond
== ISD::SETEQ
) ^ (N1C
->getAPIntValue() != 1);
2057 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, N0
);
2058 // Invert the condition.
2059 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
2060 CC
= ISD::getSetCCInverse(CC
,
2061 N0
.getOperand(0).getValueType().isInteger());
2062 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N0
.getOperand(1), CC
);
2065 if ((N0
.getOpcode() == ISD::XOR
||
2066 (N0
.getOpcode() == ISD::AND
&&
2067 N0
.getOperand(0).getOpcode() == ISD::XOR
&&
2068 N0
.getOperand(1) == N0
.getOperand(0).getOperand(1))) &&
2069 isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
2070 cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue() == 1) {
2071 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
2072 // can only do this if the top bits are known zero.
2073 unsigned BitWidth
= N0
.getValueSizeInBits();
2074 if (DAG
.MaskedValueIsZero(N0
,
2075 APInt::getHighBitsSet(BitWidth
,
2077 // Okay, get the un-inverted input value.
2079 if (N0
.getOpcode() == ISD::XOR
)
2080 Val
= N0
.getOperand(0);
2082 assert(N0
.getOpcode() == ISD::AND
&&
2083 N0
.getOperand(0).getOpcode() == ISD::XOR
);
2084 // ((X^1)&1)^1 -> X & 1
2085 Val
= DAG
.getNode(ISD::AND
, dl
, N0
.getValueType(),
2086 N0
.getOperand(0).getOperand(0),
2090 return DAG
.getSetCC(dl
, VT
, Val
, N1
,
2091 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
2093 } else if (N1C
->getAPIntValue() == 1 &&
2095 getBooleanContents() == ZeroOrOneBooleanContent
)) {
2097 if (Op0
.getOpcode() == ISD::TRUNCATE
)
2098 Op0
= Op0
.getOperand(0);
2100 if ((Op0
.getOpcode() == ISD::XOR
) &&
2101 Op0
.getOperand(0).getOpcode() == ISD::SETCC
&&
2102 Op0
.getOperand(1).getOpcode() == ISD::SETCC
) {
2103 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
2104 Cond
= (Cond
== ISD::SETEQ
) ? ISD::SETNE
: ISD::SETEQ
;
2105 return DAG
.getSetCC(dl
, VT
, Op0
.getOperand(0), Op0
.getOperand(1),
2107 } else if (Op0
.getOpcode() == ISD::AND
&&
2108 isa
<ConstantSDNode
>(Op0
.getOperand(1)) &&
2109 cast
<ConstantSDNode
>(Op0
.getOperand(1))->getAPIntValue() == 1) {
2110 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
2111 if (Op0
.getValueType().bitsGT(VT
))
2112 Op0
= DAG
.getNode(ISD::AND
, dl
, VT
,
2113 DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Op0
.getOperand(0)),
2114 DAG
.getConstant(1, VT
));
2115 else if (Op0
.getValueType().bitsLT(VT
))
2116 Op0
= DAG
.getNode(ISD::AND
, dl
, VT
,
2117 DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, Op0
.getOperand(0)),
2118 DAG
.getConstant(1, VT
));
2120 return DAG
.getSetCC(dl
, VT
, Op0
,
2121 DAG
.getConstant(0, Op0
.getValueType()),
2122 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
2127 APInt MinVal
, MaxVal
;
2128 unsigned OperandBitSize
= N1C
->getValueType(0).getSizeInBits();
2129 if (ISD::isSignedIntSetCC(Cond
)) {
2130 MinVal
= APInt::getSignedMinValue(OperandBitSize
);
2131 MaxVal
= APInt::getSignedMaxValue(OperandBitSize
);
2133 MinVal
= APInt::getMinValue(OperandBitSize
);
2134 MaxVal
= APInt::getMaxValue(OperandBitSize
);
2137 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
2138 if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
) {
2139 if (C1
== MinVal
) return DAG
.getConstant(1, VT
); // X >= MIN --> true
2140 // X >= C0 --> X > (C0-1)
2141 return DAG
.getSetCC(dl
, VT
, N0
,
2142 DAG
.getConstant(C1
-1, N1
.getValueType()),
2143 (Cond
== ISD::SETGE
) ? ISD::SETGT
: ISD::SETUGT
);
2146 if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
) {
2147 if (C1
== MaxVal
) return DAG
.getConstant(1, VT
); // X <= MAX --> true
2148 // X <= C0 --> X < (C0+1)
2149 return DAG
.getSetCC(dl
, VT
, N0
,
2150 DAG
.getConstant(C1
+1, N1
.getValueType()),
2151 (Cond
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETULT
);
2154 if ((Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) && C1
== MinVal
)
2155 return DAG
.getConstant(0, VT
); // X < MIN --> false
2156 if ((Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
) && C1
== MinVal
)
2157 return DAG
.getConstant(1, VT
); // X >= MIN --> true
2158 if ((Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) && C1
== MaxVal
)
2159 return DAG
.getConstant(0, VT
); // X > MAX --> false
2160 if ((Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
) && C1
== MaxVal
)
2161 return DAG
.getConstant(1, VT
); // X <= MAX --> true
2163 // Canonicalize setgt X, Min --> setne X, Min
2164 if ((Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) && C1
== MinVal
)
2165 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
2166 // Canonicalize setlt X, Max --> setne X, Max
2167 if ((Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) && C1
== MaxVal
)
2168 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
2170 // If we have setult X, 1, turn it into seteq X, 0
2171 if ((Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) && C1
== MinVal
+1)
2172 return DAG
.getSetCC(dl
, VT
, N0
,
2173 DAG
.getConstant(MinVal
, N0
.getValueType()),
2175 // If we have setugt X, Max-1, turn it into seteq X, Max
2176 else if ((Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) && C1
== MaxVal
-1)
2177 return DAG
.getSetCC(dl
, VT
, N0
,
2178 DAG
.getConstant(MaxVal
, N0
.getValueType()),
2181 // If we have "setcc X, C0", check to see if we can shrink the immediate
2184 // SETUGT X, SINTMAX -> SETLT X, 0
2185 if (Cond
== ISD::SETUGT
&&
2186 C1
== APInt::getSignedMaxValue(OperandBitSize
))
2187 return DAG
.getSetCC(dl
, VT
, N0
,
2188 DAG
.getConstant(0, N1
.getValueType()),
2191 // SETULT X, SINTMIN -> SETGT X, -1
2192 if (Cond
== ISD::SETULT
&&
2193 C1
== APInt::getSignedMinValue(OperandBitSize
)) {
2194 SDValue ConstMinusOne
=
2195 DAG
.getConstant(APInt::getAllOnesValue(OperandBitSize
),
2197 return DAG
.getSetCC(dl
, VT
, N0
, ConstMinusOne
, ISD::SETGT
);
2200 // Fold bit comparisons when we can.
2201 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
2202 (VT
== N0
.getValueType() ||
2203 (isTypeLegal(VT
) && VT
.bitsLE(N0
.getValueType()))) &&
2204 N0
.getOpcode() == ISD::AND
)
2205 if (ConstantSDNode
*AndRHS
=
2206 dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
2207 EVT ShiftTy
= DCI
.isBeforeLegalize() ?
2208 getPointerTy() : getShiftAmountTy(N0
.getValueType());
2209 if (Cond
== ISD::SETNE
&& C1
== 0) {// (X & 8) != 0 --> (X & 8) >> 3
2210 // Perform the xform if the AND RHS is a single bit.
2211 if (AndRHS
->getAPIntValue().isPowerOf2()) {
2212 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
2213 DAG
.getNode(ISD::SRL
, dl
, N0
.getValueType(), N0
,
2214 DAG
.getConstant(AndRHS
->getAPIntValue().logBase2(), ShiftTy
)));
2216 } else if (Cond
== ISD::SETEQ
&& C1
== AndRHS
->getAPIntValue()) {
2217 // (X & 8) == 8 --> (X & 8) >> 3
2218 // Perform the xform if C1 is a single bit.
2219 if (C1
.isPowerOf2()) {
2220 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
2221 DAG
.getNode(ISD::SRL
, dl
, N0
.getValueType(), N0
,
2222 DAG
.getConstant(C1
.logBase2(), ShiftTy
)));
2228 if (isa
<ConstantFPSDNode
>(N0
.getNode())) {
2229 // Constant fold or commute setcc.
2230 SDValue O
= DAG
.FoldSetCC(VT
, N0
, N1
, Cond
, dl
);
2231 if (O
.getNode()) return O
;
2232 } else if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(N1
.getNode())) {
2233 // If the RHS of an FP comparison is a constant, simplify it away in
2235 if (CFP
->getValueAPF().isNaN()) {
2236 // If an operand is known to be a nan, we can fold it.
2237 switch (ISD::getUnorderedFlavor(Cond
)) {
2238 default: llvm_unreachable("Unknown flavor!");
2239 case 0: // Known false.
2240 return DAG
.getConstant(0, VT
);
2241 case 1: // Known true.
2242 return DAG
.getConstant(1, VT
);
2243 case 2: // Undefined.
2244 return DAG
.getUNDEF(VT
);
2248 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
2249 // constant if knowing that the operand is non-nan is enough. We prefer to
2250 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
2252 if (Cond
== ISD::SETO
|| Cond
== ISD::SETUO
)
2253 return DAG
.getSetCC(dl
, VT
, N0
, N0
, Cond
);
2255 // If the condition is not legal, see if we can find an equivalent one
2257 if (!isCondCodeLegal(Cond
, N0
.getValueType())) {
2258 // If the comparison was an awkward floating-point == or != and one of
2259 // the comparison operands is infinity or negative infinity, convert the
2260 // condition to a less-awkward <= or >=.
2261 if (CFP
->getValueAPF().isInfinity()) {
2262 if (CFP
->getValueAPF().isNegative()) {
2263 if (Cond
== ISD::SETOEQ
&&
2264 isCondCodeLegal(ISD::SETOLE
, N0
.getValueType()))
2265 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOLE
);
2266 if (Cond
== ISD::SETUEQ
&&
2267 isCondCodeLegal(ISD::SETOLE
, N0
.getValueType()))
2268 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETULE
);
2269 if (Cond
== ISD::SETUNE
&&
2270 isCondCodeLegal(ISD::SETUGT
, N0
.getValueType()))
2271 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETUGT
);
2272 if (Cond
== ISD::SETONE
&&
2273 isCondCodeLegal(ISD::SETUGT
, N0
.getValueType()))
2274 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOGT
);
2276 if (Cond
== ISD::SETOEQ
&&
2277 isCondCodeLegal(ISD::SETOGE
, N0
.getValueType()))
2278 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOGE
);
2279 if (Cond
== ISD::SETUEQ
&&
2280 isCondCodeLegal(ISD::SETOGE
, N0
.getValueType()))
2281 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETUGE
);
2282 if (Cond
== ISD::SETUNE
&&
2283 isCondCodeLegal(ISD::SETULT
, N0
.getValueType()))
2284 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETULT
);
2285 if (Cond
== ISD::SETONE
&&
2286 isCondCodeLegal(ISD::SETULT
, N0
.getValueType()))
2287 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOLT
);
2294 // We can always fold X == X for integer setcc's.
2295 if (N0
.getValueType().isInteger())
2296 return DAG
.getConstant(ISD::isTrueWhenEqual(Cond
), VT
);
2297 unsigned UOF
= ISD::getUnorderedFlavor(Cond
);
2298 if (UOF
== 2) // FP operators that are undefined on NaNs.
2299 return DAG
.getConstant(ISD::isTrueWhenEqual(Cond
), VT
);
2300 if (UOF
== unsigned(ISD::isTrueWhenEqual(Cond
)))
2301 return DAG
.getConstant(UOF
, VT
);
2302 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
2303 // if it is not already.
2304 ISD::CondCode NewCond
= UOF
== 0 ? ISD::SETO
: ISD::SETUO
;
2305 if (NewCond
!= Cond
)
2306 return DAG
.getSetCC(dl
, VT
, N0
, N1
, NewCond
);
2309 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
2310 N0
.getValueType().isInteger()) {
2311 if (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::SUB
||
2312 N0
.getOpcode() == ISD::XOR
) {
2313 // Simplify (X+Y) == (X+Z) --> Y == Z
2314 if (N0
.getOpcode() == N1
.getOpcode()) {
2315 if (N0
.getOperand(0) == N1
.getOperand(0))
2316 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(1), Cond
);
2317 if (N0
.getOperand(1) == N1
.getOperand(1))
2318 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(0), Cond
);
2319 if (DAG
.isCommutativeBinOp(N0
.getOpcode())) {
2320 // If X op Y == Y op X, try other combinations.
2321 if (N0
.getOperand(0) == N1
.getOperand(1))
2322 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(0),
2324 if (N0
.getOperand(1) == N1
.getOperand(0))
2325 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(1),
2330 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(N1
)) {
2331 if (ConstantSDNode
*LHSR
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
2332 // Turn (X+C1) == C2 --> X == C2-C1
2333 if (N0
.getOpcode() == ISD::ADD
&& N0
.getNode()->hasOneUse()) {
2334 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
2335 DAG
.getConstant(RHSC
->getAPIntValue()-
2336 LHSR
->getAPIntValue(),
2337 N0
.getValueType()), Cond
);
2340 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
2341 if (N0
.getOpcode() == ISD::XOR
)
2342 // If we know that all of the inverted bits are zero, don't bother
2343 // performing the inversion.
2344 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), ~LHSR
->getAPIntValue()))
2346 DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
2347 DAG
.getConstant(LHSR
->getAPIntValue() ^
2348 RHSC
->getAPIntValue(),
2353 // Turn (C1-X) == C2 --> X == C1-C2
2354 if (ConstantSDNode
*SUBC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0))) {
2355 if (N0
.getOpcode() == ISD::SUB
&& N0
.getNode()->hasOneUse()) {
2357 DAG
.getSetCC(dl
, VT
, N0
.getOperand(1),
2358 DAG
.getConstant(SUBC
->getAPIntValue() -
2359 RHSC
->getAPIntValue(),
2366 // Simplify (X+Z) == X --> Z == 0
2367 if (N0
.getOperand(0) == N1
)
2368 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1),
2369 DAG
.getConstant(0, N0
.getValueType()), Cond
);
2370 if (N0
.getOperand(1) == N1
) {
2371 if (DAG
.isCommutativeBinOp(N0
.getOpcode()))
2372 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
2373 DAG
.getConstant(0, N0
.getValueType()), Cond
);
2374 else if (N0
.getNode()->hasOneUse()) {
2375 assert(N0
.getOpcode() == ISD::SUB
&& "Unexpected operation!");
2376 // (Z-X) == X --> Z == X<<1
2377 SDValue SH
= DAG
.getNode(ISD::SHL
, dl
, N1
.getValueType(),
2379 DAG
.getConstant(1, getShiftAmountTy(N1
.getValueType())));
2380 if (!DCI
.isCalledByLegalizer())
2381 DCI
.AddToWorklist(SH
.getNode());
2382 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), SH
, Cond
);
2387 if (N1
.getOpcode() == ISD::ADD
|| N1
.getOpcode() == ISD::SUB
||
2388 N1
.getOpcode() == ISD::XOR
) {
2389 // Simplify X == (X+Z) --> Z == 0
2390 if (N1
.getOperand(0) == N0
) {
2391 return DAG
.getSetCC(dl
, VT
, N1
.getOperand(1),
2392 DAG
.getConstant(0, N1
.getValueType()), Cond
);
2393 } else if (N1
.getOperand(1) == N0
) {
2394 if (DAG
.isCommutativeBinOp(N1
.getOpcode())) {
2395 return DAG
.getSetCC(dl
, VT
, N1
.getOperand(0),
2396 DAG
.getConstant(0, N1
.getValueType()), Cond
);
2397 } else if (N1
.getNode()->hasOneUse()) {
2398 assert(N1
.getOpcode() == ISD::SUB
&& "Unexpected operation!");
2399 // X == (Z-X) --> X<<1 == Z
2400 SDValue SH
= DAG
.getNode(ISD::SHL
, dl
, N1
.getValueType(), N0
,
2401 DAG
.getConstant(1, getShiftAmountTy(N0
.getValueType())));
2402 if (!DCI
.isCalledByLegalizer())
2403 DCI
.AddToWorklist(SH
.getNode());
2404 return DAG
.getSetCC(dl
, VT
, SH
, N1
.getOperand(0), Cond
);
2409 // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
2410 // Note that where y is variable and is known to have at most
2411 // one bit set (for example, if it is z&1) we cannot do this;
2412 // the expressions are not equivalent when y==0.
2413 if (N0
.getOpcode() == ISD::AND
)
2414 if (N0
.getOperand(0) == N1
|| N0
.getOperand(1) == N1
) {
2415 if (ValueHasExactlyOneBitSet(N1
, DAG
)) {
2416 Cond
= ISD::getSetCCInverse(Cond
, /*isInteger=*/true);
2417 SDValue Zero
= DAG
.getConstant(0, N1
.getValueType());
2418 return DAG
.getSetCC(dl
, VT
, N0
, Zero
, Cond
);
2421 if (N1
.getOpcode() == ISD::AND
)
2422 if (N1
.getOperand(0) == N0
|| N1
.getOperand(1) == N0
) {
2423 if (ValueHasExactlyOneBitSet(N0
, DAG
)) {
2424 Cond
= ISD::getSetCCInverse(Cond
, /*isInteger=*/true);
2425 SDValue Zero
= DAG
.getConstant(0, N0
.getValueType());
2426 return DAG
.getSetCC(dl
, VT
, N1
, Zero
, Cond
);
2431 // Fold away ALL boolean setcc's.
2433 if (N0
.getValueType() == MVT::i1
&& foldBooleans
) {
2435 default: llvm_unreachable("Unknown integer setcc!");
2436 case ISD::SETEQ
: // X == Y -> ~(X^Y)
2437 Temp
= DAG
.getNode(ISD::XOR
, dl
, MVT::i1
, N0
, N1
);
2438 N0
= DAG
.getNOT(dl
, Temp
, MVT::i1
);
2439 if (!DCI
.isCalledByLegalizer())
2440 DCI
.AddToWorklist(Temp
.getNode());
2442 case ISD::SETNE
: // X != Y --> (X^Y)
2443 N0
= DAG
.getNode(ISD::XOR
, dl
, MVT::i1
, N0
, N1
);
2445 case ISD::SETGT
: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
2446 case ISD::SETULT
: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
2447 Temp
= DAG
.getNOT(dl
, N0
, MVT::i1
);
2448 N0
= DAG
.getNode(ISD::AND
, dl
, MVT::i1
, N1
, Temp
);
2449 if (!DCI
.isCalledByLegalizer())
2450 DCI
.AddToWorklist(Temp
.getNode());
2452 case ISD::SETLT
: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
2453 case ISD::SETUGT
: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
2454 Temp
= DAG
.getNOT(dl
, N1
, MVT::i1
);
2455 N0
= DAG
.getNode(ISD::AND
, dl
, MVT::i1
, N0
, Temp
);
2456 if (!DCI
.isCalledByLegalizer())
2457 DCI
.AddToWorklist(Temp
.getNode());
2459 case ISD::SETULE
: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
2460 case ISD::SETGE
: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
2461 Temp
= DAG
.getNOT(dl
, N0
, MVT::i1
);
2462 N0
= DAG
.getNode(ISD::OR
, dl
, MVT::i1
, N1
, Temp
);
2463 if (!DCI
.isCalledByLegalizer())
2464 DCI
.AddToWorklist(Temp
.getNode());
2466 case ISD::SETUGE
: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
2467 case ISD::SETLE
: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
2468 Temp
= DAG
.getNOT(dl
, N1
, MVT::i1
);
2469 N0
= DAG
.getNode(ISD::OR
, dl
, MVT::i1
, N0
, Temp
);
2472 if (VT
!= MVT::i1
) {
2473 if (!DCI
.isCalledByLegalizer())
2474 DCI
.AddToWorklist(N0
.getNode());
2475 // FIXME: If running after legalize, we probably can't do this.
2476 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, N0
);
2481 // Could not fold it.
2485 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
2486 /// node is a GlobalAddress + offset.
2487 bool TargetLowering::isGAPlusOffset(SDNode
*N
, const GlobalValue
*&GA
,
2488 int64_t &Offset
) const {
2489 if (isa
<GlobalAddressSDNode
>(N
)) {
2490 GlobalAddressSDNode
*GASD
= cast
<GlobalAddressSDNode
>(N
);
2491 GA
= GASD
->getGlobal();
2492 Offset
+= GASD
->getOffset();
2496 if (N
->getOpcode() == ISD::ADD
) {
2497 SDValue N1
= N
->getOperand(0);
2498 SDValue N2
= N
->getOperand(1);
2499 if (isGAPlusOffset(N1
.getNode(), GA
, Offset
)) {
2500 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(N2
);
2502 Offset
+= V
->getSExtValue();
2505 } else if (isGAPlusOffset(N2
.getNode(), GA
, Offset
)) {
2506 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(N1
);
2508 Offset
+= V
->getSExtValue();
2518 SDValue
TargetLowering::
2519 PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
2520 // Default implementation: no optimization.
2524 //===----------------------------------------------------------------------===//
2525 // Inline Assembler Implementation Methods
2526 //===----------------------------------------------------------------------===//
2529 TargetLowering::ConstraintType
2530 TargetLowering::getConstraintType(const std::string
&Constraint
) const {
2531 // FIXME: lots more standard ones to handle.
2532 if (Constraint
.size() == 1) {
2533 switch (Constraint
[0]) {
2535 case 'r': return C_RegisterClass
;
2537 case 'o': // offsetable
2538 case 'V': // not offsetable
2540 case 'i': // Simple Integer or Relocatable Constant
2541 case 'n': // Simple Integer
2542 case 'E': // Floating Point Constant
2543 case 'F': // Floating Point Constant
2544 case 's': // Relocatable Constant
2545 case 'p': // Address.
2546 case 'X': // Allow ANY value.
2547 case 'I': // Target registers.
2561 if (Constraint
.size() > 1 && Constraint
[0] == '{' &&
2562 Constraint
[Constraint
.size()-1] == '}')
2567 /// LowerXConstraint - try to replace an X constraint, which matches anything,
2568 /// with another that has more specific requirements based on the type of the
2569 /// corresponding operand.
2570 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT
) const{
2571 if (ConstraintVT
.isInteger())
2573 if (ConstraintVT
.isFloatingPoint())
2574 return "f"; // works for many targets
2578 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
2579 /// vector. If it is invalid, don't add anything to Ops.
2580 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
2581 char ConstraintLetter
,
2582 std::vector
<SDValue
> &Ops
,
2583 SelectionDAG
&DAG
) const {
2584 switch (ConstraintLetter
) {
2586 case 'X': // Allows any operand; labels (basic block) use this.
2587 if (Op
.getOpcode() == ISD::BasicBlock
) {
2592 case 'i': // Simple Integer or Relocatable Constant
2593 case 'n': // Simple Integer
2594 case 's': { // Relocatable Constant
2595 // These operands are interested in values of the form (GV+C), where C may
2596 // be folded in as an offset of GV, or it may be explicitly added. Also, it
2597 // is possible and fine if either GV or C are missing.
2598 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
2599 GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
);
2601 // If we have "(add GV, C)", pull out GV/C
2602 if (Op
.getOpcode() == ISD::ADD
) {
2603 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
2604 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(0));
2605 if (C
== 0 || GA
== 0) {
2606 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(0));
2607 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(1));
2609 if (C
== 0 || GA
== 0)
2613 // If we find a valid operand, map to the TargetXXX version so that the
2614 // value itself doesn't get selected.
2615 if (GA
) { // Either &GV or &GV+C
2616 if (ConstraintLetter
!= 'n') {
2617 int64_t Offs
= GA
->getOffset();
2618 if (C
) Offs
+= C
->getZExtValue();
2619 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(),
2620 C
? C
->getDebugLoc() : DebugLoc(),
2621 Op
.getValueType(), Offs
));
2625 if (C
) { // just C, no GV.
2626 // Simple constants are not allowed for 's'.
2627 if (ConstraintLetter
!= 's') {
2628 // gcc prints these as sign extended. Sign extend value to 64 bits
2629 // now; without this it would get ZExt'd later in
2630 // ScheduleDAGSDNodes::EmitNode, which is very generic.
2631 Ops
.push_back(DAG
.getTargetConstant(C
->getAPIntValue().getSExtValue(),
2641 std::vector
<unsigned> TargetLowering::
2642 getRegClassForInlineAsmConstraint(const std::string
&Constraint
,
2644 return std::vector
<unsigned>();
2648 std::pair
<unsigned, const TargetRegisterClass
*> TargetLowering::
2649 getRegForInlineAsmConstraint(const std::string
&Constraint
,
2651 if (Constraint
[0] != '{')
2652 return std::make_pair(0u, static_cast<TargetRegisterClass
*>(0));
2653 assert(*(Constraint
.end()-1) == '}' && "Not a brace enclosed constraint?");
2655 // Remove the braces from around the name.
2656 StringRef
RegName(Constraint
.data()+1, Constraint
.size()-2);
2658 // Figure out which register class contains this reg.
2659 const TargetRegisterInfo
*RI
= TM
.getRegisterInfo();
2660 for (TargetRegisterInfo::regclass_iterator RCI
= RI
->regclass_begin(),
2661 E
= RI
->regclass_end(); RCI
!= E
; ++RCI
) {
2662 const TargetRegisterClass
*RC
= *RCI
;
2664 // If none of the value types for this register class are valid, we
2665 // can't use it. For example, 64-bit reg classes on 32-bit targets.
2666 bool isLegal
= false;
2667 for (TargetRegisterClass::vt_iterator I
= RC
->vt_begin(), E
= RC
->vt_end();
2669 if (isTypeLegal(*I
)) {
2675 if (!isLegal
) continue;
2677 for (TargetRegisterClass::iterator I
= RC
->begin(), E
= RC
->end();
2679 if (RegName
.equals_lower(RI
->getName(*I
)))
2680 return std::make_pair(*I
, RC
);
2684 return std::make_pair(0u, static_cast<const TargetRegisterClass
*>(0));
2687 //===----------------------------------------------------------------------===//
2688 // Constraint Selection.
2690 /// isMatchingInputConstraint - Return true of this is an input operand that is
2691 /// a matching constraint like "4".
2692 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
2693 assert(!ConstraintCode
.empty() && "No known constraint!");
2694 return isdigit(ConstraintCode
[0]);
2697 /// getMatchedOperand - If this is an input matching constraint, this method
2698 /// returns the output operand it matches.
2699 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
2700 assert(!ConstraintCode
.empty() && "No known constraint!");
2701 return atoi(ConstraintCode
.c_str());
2705 /// ParseConstraints - Split up the constraint string from the inline
2706 /// assembly value into the specific constraints and their prefixes,
2707 /// and also tie in the associated operand values.
2708 /// If this returns an empty vector, and if the constraint string itself
2709 /// isn't empty, there was an error parsing.
2710 TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(
2711 ImmutableCallSite CS
) const {
2712 /// ConstraintOperands - Information about all of the constraints.
2713 AsmOperandInfoVector ConstraintOperands
;
2714 const InlineAsm
*IA
= cast
<InlineAsm
>(CS
.getCalledValue());
2715 unsigned maCount
= 0; // Largest number of multiple alternative constraints.
2717 // Do a prepass over the constraints, canonicalizing them, and building up the
2718 // ConstraintOperands list.
2719 InlineAsm::ConstraintInfoVector
2720 ConstraintInfos
= IA
->ParseConstraints();
2722 unsigned ArgNo
= 0; // ArgNo - The argument of the CallInst.
2723 unsigned ResNo
= 0; // ResNo - The result number of the next output.
2725 for (unsigned i
= 0, e
= ConstraintInfos
.size(); i
!= e
; ++i
) {
2726 ConstraintOperands
.push_back(AsmOperandInfo(ConstraintInfos
[i
]));
2727 AsmOperandInfo
&OpInfo
= ConstraintOperands
.back();
2729 // Update multiple alternative constraint count.
2730 if (OpInfo
.multipleAlternatives
.size() > maCount
)
2731 maCount
= OpInfo
.multipleAlternatives
.size();
2733 OpInfo
.ConstraintVT
= MVT::Other
;
2735 // Compute the value type for each operand.
2736 switch (OpInfo
.Type
) {
2737 case InlineAsm::isOutput
:
2738 // Indirect outputs just consume an argument.
2739 if (OpInfo
.isIndirect
) {
2740 OpInfo
.CallOperandVal
= const_cast<Value
*>(CS
.getArgument(ArgNo
++));
2744 // The return value of the call is this value. As such, there is no
2745 // corresponding argument.
2746 assert(!CS
.getType()->isVoidTy() &&
2748 if (const StructType
*STy
= dyn_cast
<StructType
>(CS
.getType())) {
2749 OpInfo
.ConstraintVT
= getValueType(STy
->getElementType(ResNo
));
2751 assert(ResNo
== 0 && "Asm only has one result!");
2752 OpInfo
.ConstraintVT
= getValueType(CS
.getType());
2756 case InlineAsm::isInput
:
2757 OpInfo
.CallOperandVal
= const_cast<Value
*>(CS
.getArgument(ArgNo
++));
2759 case InlineAsm::isClobber
:
2764 if (OpInfo
.CallOperandVal
) {
2765 const llvm::Type
*OpTy
= OpInfo
.CallOperandVal
->getType();
2766 if (OpInfo
.isIndirect
) {
2767 const llvm::PointerType
*PtrTy
= dyn_cast
<PointerType
>(OpTy
);
2769 report_fatal_error("Indirect operand for inline asm not a pointer!");
2770 OpTy
= PtrTy
->getElementType();
2772 // If OpTy is not a single value, it may be a struct/union that we
2773 // can tile with integers.
2774 if (!OpTy
->isSingleValueType() && OpTy
->isSized()) {
2775 unsigned BitSize
= TD
->getTypeSizeInBits(OpTy
);
2784 OpInfo
.ConstraintVT
=
2785 EVT::getEVT(IntegerType::get(OpTy
->getContext(), BitSize
), true);
2788 } else if (dyn_cast
<PointerType
>(OpTy
)) {
2789 OpInfo
.ConstraintVT
= MVT::getIntegerVT(8*TD
->getPointerSize());
2791 OpInfo
.ConstraintVT
= EVT::getEVT(OpTy
, true);
2796 // If we have multiple alternative constraints, select the best alternative.
2797 if (ConstraintInfos
.size()) {
2799 unsigned bestMAIndex
= 0;
2800 int bestWeight
= -1;
2801 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
2804 // Compute the sums of the weights for each alternative, keeping track
2805 // of the best (highest weight) one so far.
2806 for (maIndex
= 0; maIndex
< maCount
; ++maIndex
) {
2808 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
2809 cIndex
!= eIndex
; ++cIndex
) {
2810 AsmOperandInfo
& OpInfo
= ConstraintOperands
[cIndex
];
2811 if (OpInfo
.Type
== InlineAsm::isClobber
)
2814 // If this is an output operand with a matching input operand,
2815 // look up the matching input. If their types mismatch, e.g. one
2816 // is an integer, the other is floating point, or their sizes are
2817 // different, flag it as an maCantMatch.
2818 if (OpInfo
.hasMatchingInput()) {
2819 AsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
2820 if (OpInfo
.ConstraintVT
!= Input
.ConstraintVT
) {
2821 if ((OpInfo
.ConstraintVT
.isInteger() !=
2822 Input
.ConstraintVT
.isInteger()) ||
2823 (OpInfo
.ConstraintVT
.getSizeInBits() !=
2824 Input
.ConstraintVT
.getSizeInBits())) {
2825 weightSum
= -1; // Can't match.
2830 weight
= getMultipleConstraintMatchWeight(OpInfo
, maIndex
);
2835 weightSum
+= weight
;
2838 if (weightSum
> bestWeight
) {
2839 bestWeight
= weightSum
;
2840 bestMAIndex
= maIndex
;
2844 // Now select chosen alternative in each constraint.
2845 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
2846 cIndex
!= eIndex
; ++cIndex
) {
2847 AsmOperandInfo
& cInfo
= ConstraintOperands
[cIndex
];
2848 if (cInfo
.Type
== InlineAsm::isClobber
)
2850 cInfo
.selectAlternative(bestMAIndex
);
2855 // Check and hook up tied operands, choose constraint code to use.
2856 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
2857 cIndex
!= eIndex
; ++cIndex
) {
2858 AsmOperandInfo
& OpInfo
= ConstraintOperands
[cIndex
];
2860 // If this is an output operand with a matching input operand, look up the
2861 // matching input. If their types mismatch, e.g. one is an integer, the
2862 // other is floating point, or their sizes are different, flag it as an
2864 if (OpInfo
.hasMatchingInput()) {
2865 AsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
2867 if (OpInfo
.ConstraintVT
!= Input
.ConstraintVT
) {
2868 if ((OpInfo
.ConstraintVT
.isInteger() !=
2869 Input
.ConstraintVT
.isInteger()) ||
2870 (OpInfo
.ConstraintVT
.getSizeInBits() !=
2871 Input
.ConstraintVT
.getSizeInBits())) {
2872 report_fatal_error("Unsupported asm: input constraint"
2873 " with a matching output constraint of"
2874 " incompatible type!");
2881 return ConstraintOperands
;
2885 /// getConstraintGenerality - Return an integer indicating how general CT
2887 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT
) {
2889 default: llvm_unreachable("Unknown constraint type!");
2890 case TargetLowering::C_Other
:
2891 case TargetLowering::C_Unknown
:
2893 case TargetLowering::C_Register
:
2895 case TargetLowering::C_RegisterClass
:
2897 case TargetLowering::C_Memory
:
2902 /// Examine constraint type and operand type and determine a weight value.
2903 /// This object must already have been set up with the operand type
2904 /// and the current alternative constraint selected.
2905 TargetLowering::ConstraintWeight
2906 TargetLowering::getMultipleConstraintMatchWeight(
2907 AsmOperandInfo
&info
, int maIndex
) const {
2908 InlineAsm::ConstraintCodeVector
*rCodes
;
2909 if (maIndex
>= (int)info
.multipleAlternatives
.size())
2910 rCodes
= &info
.Codes
;
2912 rCodes
= &info
.multipleAlternatives
[maIndex
].Codes
;
2913 ConstraintWeight BestWeight
= CW_Invalid
;
2915 // Loop over the options, keeping track of the most general one.
2916 for (unsigned i
= 0, e
= rCodes
->size(); i
!= e
; ++i
) {
2917 ConstraintWeight weight
=
2918 getSingleConstraintMatchWeight(info
, (*rCodes
)[i
].c_str());
2919 if (weight
> BestWeight
)
2920 BestWeight
= weight
;
2926 /// Examine constraint type and operand type and determine a weight value.
2927 /// This object must already have been set up with the operand type
2928 /// and the current alternative constraint selected.
2929 TargetLowering::ConstraintWeight
2930 TargetLowering::getSingleConstraintMatchWeight(
2931 AsmOperandInfo
&info
, const char *constraint
) const {
2932 ConstraintWeight weight
= CW_Invalid
;
2933 Value
*CallOperandVal
= info
.CallOperandVal
;
2934 // If we don't have a value, we can't do a match,
2935 // but allow it at the lowest weight.
2936 if (CallOperandVal
== NULL
)
2938 // Look at the constraint type.
2939 switch (*constraint
) {
2940 case 'i': // immediate integer.
2941 case 'n': // immediate integer with a known value.
2942 if (isa
<ConstantInt
>(CallOperandVal
))
2943 weight
= CW_Constant
;
2945 case 's': // non-explicit intregal immediate.
2946 if (isa
<GlobalValue
>(CallOperandVal
))
2947 weight
= CW_Constant
;
2949 case 'E': // immediate float if host format.
2950 case 'F': // immediate float.
2951 if (isa
<ConstantFP
>(CallOperandVal
))
2952 weight
= CW_Constant
;
2954 case '<': // memory operand with autodecrement.
2955 case '>': // memory operand with autoincrement.
2956 case 'm': // memory operand.
2957 case 'o': // offsettable memory operand
2958 case 'V': // non-offsettable memory operand
2961 case 'r': // general register.
2962 case 'g': // general register, memory operand or immediate integer.
2963 // note: Clang converts "g" to "imr".
2964 if (CallOperandVal
->getType()->isIntegerTy())
2965 weight
= CW_Register
;
2967 case 'X': // any operand.
2969 weight
= CW_Default
;
2975 /// ChooseConstraint - If there are multiple different constraints that we
2976 /// could pick for this operand (e.g. "imr") try to pick the 'best' one.
2977 /// This is somewhat tricky: constraints fall into four classes:
2978 /// Other -> immediates and magic values
2979 /// Register -> one specific register
2980 /// RegisterClass -> a group of regs
2981 /// Memory -> memory
2982 /// Ideally, we would pick the most specific constraint possible: if we have
2983 /// something that fits into a register, we would pick it. The problem here
2984 /// is that if we have something that could either be in a register or in
2985 /// memory that use of the register could cause selection of *other*
2986 /// operands to fail: they might only succeed if we pick memory. Because of
2987 /// this the heuristic we use is:
2989 /// 1) If there is an 'other' constraint, and if the operand is valid for
2990 /// that constraint, use it. This makes us take advantage of 'i'
2991 /// constraints when available.
2992 /// 2) Otherwise, pick the most general constraint present. This prefers
2993 /// 'm' over 'r', for example.
2995 static void ChooseConstraint(TargetLowering::AsmOperandInfo
&OpInfo
,
2996 const TargetLowering
&TLI
,
2997 SDValue Op
, SelectionDAG
*DAG
) {
2998 assert(OpInfo
.Codes
.size() > 1 && "Doesn't have multiple constraint options");
2999 unsigned BestIdx
= 0;
3000 TargetLowering::ConstraintType BestType
= TargetLowering::C_Unknown
;
3001 int BestGenerality
= -1;
3003 // Loop over the options, keeping track of the most general one.
3004 for (unsigned i
= 0, e
= OpInfo
.Codes
.size(); i
!= e
; ++i
) {
3005 TargetLowering::ConstraintType CType
=
3006 TLI
.getConstraintType(OpInfo
.Codes
[i
]);
3008 // If this is an 'other' constraint, see if the operand is valid for it.
3009 // For example, on X86 we might have an 'rI' constraint. If the operand
3010 // is an integer in the range [0..31] we want to use I (saving a load
3011 // of a register), otherwise we must use 'r'.
3012 if (CType
== TargetLowering::C_Other
&& Op
.getNode()) {
3013 assert(OpInfo
.Codes
[i
].size() == 1 &&
3014 "Unhandled multi-letter 'other' constraint");
3015 std::vector
<SDValue
> ResultOps
;
3016 TLI
.LowerAsmOperandForConstraint(Op
, OpInfo
.Codes
[i
][0],
3018 if (!ResultOps
.empty()) {
3025 // Things with matching constraints can only be registers, per gcc
3026 // documentation. This mainly affects "g" constraints.
3027 if (CType
== TargetLowering::C_Memory
&& OpInfo
.hasMatchingInput())
3030 // This constraint letter is more general than the previous one, use it.
3031 int Generality
= getConstraintGenerality(CType
);
3032 if (Generality
> BestGenerality
) {
3035 BestGenerality
= Generality
;
3039 OpInfo
.ConstraintCode
= OpInfo
.Codes
[BestIdx
];
3040 OpInfo
.ConstraintType
= BestType
;
3043 /// ComputeConstraintToUse - Determines the constraint code and constraint
3044 /// type to use for the specific AsmOperandInfo, setting
3045 /// OpInfo.ConstraintCode and OpInfo.ConstraintType.
3046 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo
&OpInfo
,
3048 SelectionDAG
*DAG
) const {
3049 assert(!OpInfo
.Codes
.empty() && "Must have at least one constraint");
3051 // Single-letter constraints ('r') are very common.
3052 if (OpInfo
.Codes
.size() == 1) {
3053 OpInfo
.ConstraintCode
= OpInfo
.Codes
[0];
3054 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
3056 ChooseConstraint(OpInfo
, *this, Op
, DAG
);
3059 // 'X' matches anything.
3060 if (OpInfo
.ConstraintCode
== "X" && OpInfo
.CallOperandVal
) {
3061 // Labels and constants are handled elsewhere ('X' is the only thing
3062 // that matches labels). For Functions, the type here is the type of
3063 // the result, which is not what we want to look at; leave them alone.
3064 Value
*v
= OpInfo
.CallOperandVal
;
3065 if (isa
<BasicBlock
>(v
) || isa
<ConstantInt
>(v
) || isa
<Function
>(v
)) {
3066 OpInfo
.CallOperandVal
= v
;
3070 // Otherwise, try to resolve it to something we know about by looking at
3071 // the actual operand type.
3072 if (const char *Repl
= LowerXConstraint(OpInfo
.ConstraintVT
)) {
3073 OpInfo
.ConstraintCode
= Repl
;
3074 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
3079 //===----------------------------------------------------------------------===//
3080 // Loop Strength Reduction hooks
3081 //===----------------------------------------------------------------------===//
3083 /// isLegalAddressingMode - Return true if the addressing mode represented
3084 /// by AM is legal for this target, for a load/store of the specified type.
3085 bool TargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
3086 const Type
*Ty
) const {
3087 // The default implementation of this implements a conservative RISCy, r+r and
3090 // Allows a sign-extended 16-bit immediate field.
3091 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
3094 // No global is ever allowed as a base.
3098 // Only support r+r,
3100 case 0: // "r+i" or just "i", depending on HasBaseReg.
3103 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
3105 // Otherwise we have r+r or r+i.
3108 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
3110 // Allow 2*r as r+r.
3117 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
3118 /// return a DAG expression to select that will generate the same value by
3119 /// multiplying by a magic number. See:
3120 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
3121 SDValue
TargetLowering::BuildSDIV(SDNode
*N
, SelectionDAG
&DAG
,
3122 std::vector
<SDNode
*>* Created
) const {
3123 EVT VT
= N
->getValueType(0);
3124 DebugLoc dl
= N
->getDebugLoc();
3126 // Check to see if we can do this.
3127 // FIXME: We should be more aggressive here.
3128 if (!isTypeLegal(VT
))
3131 APInt d
= cast
<ConstantSDNode
>(N
->getOperand(1))->getAPIntValue();
3132 APInt::ms magics
= d
.magic();
3134 // Multiply the numerator (operand 0) by the magic value
3135 // FIXME: We should support doing a MUL in a wider type
3137 if (isOperationLegalOrCustom(ISD::MULHS
, VT
))
3138 Q
= DAG
.getNode(ISD::MULHS
, dl
, VT
, N
->getOperand(0),
3139 DAG
.getConstant(magics
.m
, VT
));
3140 else if (isOperationLegalOrCustom(ISD::SMUL_LOHI
, VT
))
3141 Q
= SDValue(DAG
.getNode(ISD::SMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
),
3143 DAG
.getConstant(magics
.m
, VT
)).getNode(), 1);
3145 return SDValue(); // No mulhs or equvialent
3146 // If d > 0 and m < 0, add the numerator
3147 if (d
.isStrictlyPositive() && magics
.m
.isNegative()) {
3148 Q
= DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, N
->getOperand(0));
3150 Created
->push_back(Q
.getNode());
3152 // If d < 0 and m > 0, subtract the numerator.
3153 if (d
.isNegative() && magics
.m
.isStrictlyPositive()) {
3154 Q
= DAG
.getNode(ISD::SUB
, dl
, VT
, Q
, N
->getOperand(0));
3156 Created
->push_back(Q
.getNode());
3158 // Shift right algebraic if shift value is nonzero
3160 Q
= DAG
.getNode(ISD::SRA
, dl
, VT
, Q
,
3161 DAG
.getConstant(magics
.s
, getShiftAmountTy(Q
.getValueType())));
3163 Created
->push_back(Q
.getNode());
3165 // Extract the sign bit and add it to the quotient
3167 DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, DAG
.getConstant(VT
.getSizeInBits()-1,
3168 getShiftAmountTy(Q
.getValueType())));
3170 Created
->push_back(T
.getNode());
3171 return DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, T
);
3174 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
3175 /// return a DAG expression to select that will generate the same value by
3176 /// multiplying by a magic number. See:
3177 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
3178 SDValue
TargetLowering::BuildUDIV(SDNode
*N
, SelectionDAG
&DAG
,
3179 std::vector
<SDNode
*>* Created
) const {
3180 EVT VT
= N
->getValueType(0);
3181 DebugLoc dl
= N
->getDebugLoc();
3183 // Check to see if we can do this.
3184 // FIXME: We should be more aggressive here.
3185 if (!isTypeLegal(VT
))
3188 // FIXME: We should use a narrower constant when the upper
3189 // bits are known to be zero.
3190 const APInt
&N1C
= cast
<ConstantSDNode
>(N
->getOperand(1))->getAPIntValue();
3191 APInt::mu magics
= N1C
.magicu();
3193 SDValue Q
= N
->getOperand(0);
3195 // If the divisor is even, we can avoid using the expensive fixup by shifting
3196 // the divided value upfront.
3197 if (magics
.a
!= 0 && !N1C
[0]) {
3198 unsigned Shift
= N1C
.countTrailingZeros();
3199 Q
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
,
3200 DAG
.getConstant(Shift
, getShiftAmountTy(Q
.getValueType())));
3202 Created
->push_back(Q
.getNode());
3204 // Get magic number for the shifted divisor.
3205 magics
= N1C
.lshr(Shift
).magicu(Shift
);
3206 assert(magics
.a
== 0 && "Should use cheap fixup now");
3209 // Multiply the numerator (operand 0) by the magic value
3210 // FIXME: We should support doing a MUL in a wider type
3211 if (isOperationLegalOrCustom(ISD::MULHU
, VT
))
3212 Q
= DAG
.getNode(ISD::MULHU
, dl
, VT
, Q
, DAG
.getConstant(magics
.m
, VT
));
3213 else if (isOperationLegalOrCustom(ISD::UMUL_LOHI
, VT
))
3214 Q
= SDValue(DAG
.getNode(ISD::UMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
), Q
,
3215 DAG
.getConstant(magics
.m
, VT
)).getNode(), 1);
3217 return SDValue(); // No mulhu or equvialent
3219 Created
->push_back(Q
.getNode());
3221 if (magics
.a
== 0) {
3222 assert(magics
.s
< N1C
.getBitWidth() &&
3223 "We shouldn't generate an undefined shift!");
3224 return DAG
.getNode(ISD::SRL
, dl
, VT
, Q
,
3225 DAG
.getConstant(magics
.s
, getShiftAmountTy(Q
.getValueType())));
3227 SDValue NPQ
= DAG
.getNode(ISD::SUB
, dl
, VT
, N
->getOperand(0), Q
);
3229 Created
->push_back(NPQ
.getNode());
3230 NPQ
= DAG
.getNode(ISD::SRL
, dl
, VT
, NPQ
,
3231 DAG
.getConstant(1, getShiftAmountTy(NPQ
.getValueType())));
3233 Created
->push_back(NPQ
.getNode());
3234 NPQ
= DAG
.getNode(ISD::ADD
, dl
, VT
, NPQ
, Q
);
3236 Created
->push_back(NPQ
.getNode());
3237 return DAG
.getNode(ISD::SRL
, dl
, VT
, NPQ
,
3238 DAG
.getConstant(magics
.s
-1, getShiftAmountTy(NPQ
.getValueType())));