1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements the TargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Target/TargetLowering.h"
15 #include "llvm/Target/TargetAsmInfo.h"
16 #include "llvm/Target/TargetData.h"
17 #include "llvm/Target/TargetLoweringObjectFile.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Target/TargetRegisterInfo.h"
20 #include "llvm/Target/TargetSubtarget.h"
21 #include "llvm/GlobalVariable.h"
22 #include "llvm/DerivedTypes.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/MathExtras.h"
32 TLSModel::Model
getTLSModel(const GlobalValue
*GV
, Reloc::Model reloc
) {
33 bool isLocal
= GV
->hasLocalLinkage();
34 bool isDeclaration
= GV
->isDeclaration();
35 // FIXME: what should we do for protected and internal visibility?
36 // For variables, is internal different from hidden?
37 bool isHidden
= GV
->hasHiddenVisibility();
39 if (reloc
== Reloc::PIC_
) {
40 if (isLocal
|| isHidden
)
41 return TLSModel::LocalDynamic
;
43 return TLSModel::GeneralDynamic
;
45 if (!isDeclaration
|| isHidden
)
46 return TLSModel::LocalExec
;
48 return TLSModel::InitialExec
;
53 /// InitLibcallNames - Set default libcall names.
55 static void InitLibcallNames(const char **Names
) {
56 Names
[RTLIB::SHL_I16
] = "__ashlhi3";
57 Names
[RTLIB::SHL_I32
] = "__ashlsi3";
58 Names
[RTLIB::SHL_I64
] = "__ashldi3";
59 Names
[RTLIB::SHL_I128
] = "__ashlti3";
60 Names
[RTLIB::SRL_I16
] = "__lshrhi3";
61 Names
[RTLIB::SRL_I32
] = "__lshrsi3";
62 Names
[RTLIB::SRL_I64
] = "__lshrdi3";
63 Names
[RTLIB::SRL_I128
] = "__lshrti3";
64 Names
[RTLIB::SRA_I16
] = "__ashrhi3";
65 Names
[RTLIB::SRA_I32
] = "__ashrsi3";
66 Names
[RTLIB::SRA_I64
] = "__ashrdi3";
67 Names
[RTLIB::SRA_I128
] = "__ashrti3";
68 Names
[RTLIB::MUL_I16
] = "__mulhi3";
69 Names
[RTLIB::MUL_I32
] = "__mulsi3";
70 Names
[RTLIB::MUL_I64
] = "__muldi3";
71 Names
[RTLIB::MUL_I128
] = "__multi3";
72 Names
[RTLIB::SDIV_I16
] = "__divhi3";
73 Names
[RTLIB::SDIV_I32
] = "__divsi3";
74 Names
[RTLIB::SDIV_I64
] = "__divdi3";
75 Names
[RTLIB::SDIV_I128
] = "__divti3";
76 Names
[RTLIB::UDIV_I16
] = "__udivhi3";
77 Names
[RTLIB::UDIV_I32
] = "__udivsi3";
78 Names
[RTLIB::UDIV_I64
] = "__udivdi3";
79 Names
[RTLIB::UDIV_I128
] = "__udivti3";
80 Names
[RTLIB::SREM_I16
] = "__modhi3";
81 Names
[RTLIB::SREM_I32
] = "__modsi3";
82 Names
[RTLIB::SREM_I64
] = "__moddi3";
83 Names
[RTLIB::SREM_I128
] = "__modti3";
84 Names
[RTLIB::UREM_I16
] = "__umodhi3";
85 Names
[RTLIB::UREM_I32
] = "__umodsi3";
86 Names
[RTLIB::UREM_I64
] = "__umoddi3";
87 Names
[RTLIB::UREM_I128
] = "__umodti3";
88 Names
[RTLIB::NEG_I32
] = "__negsi2";
89 Names
[RTLIB::NEG_I64
] = "__negdi2";
90 Names
[RTLIB::ADD_F32
] = "__addsf3";
91 Names
[RTLIB::ADD_F64
] = "__adddf3";
92 Names
[RTLIB::ADD_F80
] = "__addxf3";
93 Names
[RTLIB::ADD_PPCF128
] = "__gcc_qadd";
94 Names
[RTLIB::SUB_F32
] = "__subsf3";
95 Names
[RTLIB::SUB_F64
] = "__subdf3";
96 Names
[RTLIB::SUB_F80
] = "__subxf3";
97 Names
[RTLIB::SUB_PPCF128
] = "__gcc_qsub";
98 Names
[RTLIB::MUL_F32
] = "__mulsf3";
99 Names
[RTLIB::MUL_F64
] = "__muldf3";
100 Names
[RTLIB::MUL_F80
] = "__mulxf3";
101 Names
[RTLIB::MUL_PPCF128
] = "__gcc_qmul";
102 Names
[RTLIB::DIV_F32
] = "__divsf3";
103 Names
[RTLIB::DIV_F64
] = "__divdf3";
104 Names
[RTLIB::DIV_F80
] = "__divxf3";
105 Names
[RTLIB::DIV_PPCF128
] = "__gcc_qdiv";
106 Names
[RTLIB::REM_F32
] = "fmodf";
107 Names
[RTLIB::REM_F64
] = "fmod";
108 Names
[RTLIB::REM_F80
] = "fmodl";
109 Names
[RTLIB::REM_PPCF128
] = "fmodl";
110 Names
[RTLIB::POWI_F32
] = "__powisf2";
111 Names
[RTLIB::POWI_F64
] = "__powidf2";
112 Names
[RTLIB::POWI_F80
] = "__powixf2";
113 Names
[RTLIB::POWI_PPCF128
] = "__powitf2";
114 Names
[RTLIB::SQRT_F32
] = "sqrtf";
115 Names
[RTLIB::SQRT_F64
] = "sqrt";
116 Names
[RTLIB::SQRT_F80
] = "sqrtl";
117 Names
[RTLIB::SQRT_PPCF128
] = "sqrtl";
118 Names
[RTLIB::LOG_F32
] = "logf";
119 Names
[RTLIB::LOG_F64
] = "log";
120 Names
[RTLIB::LOG_F80
] = "logl";
121 Names
[RTLIB::LOG_PPCF128
] = "logl";
122 Names
[RTLIB::LOG2_F32
] = "log2f";
123 Names
[RTLIB::LOG2_F64
] = "log2";
124 Names
[RTLIB::LOG2_F80
] = "log2l";
125 Names
[RTLIB::LOG2_PPCF128
] = "log2l";
126 Names
[RTLIB::LOG10_F32
] = "log10f";
127 Names
[RTLIB::LOG10_F64
] = "log10";
128 Names
[RTLIB::LOG10_F80
] = "log10l";
129 Names
[RTLIB::LOG10_PPCF128
] = "log10l";
130 Names
[RTLIB::EXP_F32
] = "expf";
131 Names
[RTLIB::EXP_F64
] = "exp";
132 Names
[RTLIB::EXP_F80
] = "expl";
133 Names
[RTLIB::EXP_PPCF128
] = "expl";
134 Names
[RTLIB::EXP2_F32
] = "exp2f";
135 Names
[RTLIB::EXP2_F64
] = "exp2";
136 Names
[RTLIB::EXP2_F80
] = "exp2l";
137 Names
[RTLIB::EXP2_PPCF128
] = "exp2l";
138 Names
[RTLIB::SIN_F32
] = "sinf";
139 Names
[RTLIB::SIN_F64
] = "sin";
140 Names
[RTLIB::SIN_F80
] = "sinl";
141 Names
[RTLIB::SIN_PPCF128
] = "sinl";
142 Names
[RTLIB::COS_F32
] = "cosf";
143 Names
[RTLIB::COS_F64
] = "cos";
144 Names
[RTLIB::COS_F80
] = "cosl";
145 Names
[RTLIB::COS_PPCF128
] = "cosl";
146 Names
[RTLIB::POW_F32
] = "powf";
147 Names
[RTLIB::POW_F64
] = "pow";
148 Names
[RTLIB::POW_F80
] = "powl";
149 Names
[RTLIB::POW_PPCF128
] = "powl";
150 Names
[RTLIB::CEIL_F32
] = "ceilf";
151 Names
[RTLIB::CEIL_F64
] = "ceil";
152 Names
[RTLIB::CEIL_F80
] = "ceill";
153 Names
[RTLIB::CEIL_PPCF128
] = "ceill";
154 Names
[RTLIB::TRUNC_F32
] = "truncf";
155 Names
[RTLIB::TRUNC_F64
] = "trunc";
156 Names
[RTLIB::TRUNC_F80
] = "truncl";
157 Names
[RTLIB::TRUNC_PPCF128
] = "truncl";
158 Names
[RTLIB::RINT_F32
] = "rintf";
159 Names
[RTLIB::RINT_F64
] = "rint";
160 Names
[RTLIB::RINT_F80
] = "rintl";
161 Names
[RTLIB::RINT_PPCF128
] = "rintl";
162 Names
[RTLIB::NEARBYINT_F32
] = "nearbyintf";
163 Names
[RTLIB::NEARBYINT_F64
] = "nearbyint";
164 Names
[RTLIB::NEARBYINT_F80
] = "nearbyintl";
165 Names
[RTLIB::NEARBYINT_PPCF128
] = "nearbyintl";
166 Names
[RTLIB::FLOOR_F32
] = "floorf";
167 Names
[RTLIB::FLOOR_F64
] = "floor";
168 Names
[RTLIB::FLOOR_F80
] = "floorl";
169 Names
[RTLIB::FLOOR_PPCF128
] = "floorl";
170 Names
[RTLIB::FPEXT_F32_F64
] = "__extendsfdf2";
171 Names
[RTLIB::FPROUND_F64_F32
] = "__truncdfsf2";
172 Names
[RTLIB::FPROUND_F80_F32
] = "__truncxfsf2";
173 Names
[RTLIB::FPROUND_PPCF128_F32
] = "__trunctfsf2";
174 Names
[RTLIB::FPROUND_F80_F64
] = "__truncxfdf2";
175 Names
[RTLIB::FPROUND_PPCF128_F64
] = "__trunctfdf2";
176 Names
[RTLIB::FPTOSINT_F32_I8
] = "__fixsfi8";
177 Names
[RTLIB::FPTOSINT_F32_I16
] = "__fixsfi16";
178 Names
[RTLIB::FPTOSINT_F32_I32
] = "__fixsfsi";
179 Names
[RTLIB::FPTOSINT_F32_I64
] = "__fixsfdi";
180 Names
[RTLIB::FPTOSINT_F32_I128
] = "__fixsfti";
181 Names
[RTLIB::FPTOSINT_F64_I32
] = "__fixdfsi";
182 Names
[RTLIB::FPTOSINT_F64_I64
] = "__fixdfdi";
183 Names
[RTLIB::FPTOSINT_F64_I128
] = "__fixdfti";
184 Names
[RTLIB::FPTOSINT_F80_I32
] = "__fixxfsi";
185 Names
[RTLIB::FPTOSINT_F80_I64
] = "__fixxfdi";
186 Names
[RTLIB::FPTOSINT_F80_I128
] = "__fixxfti";
187 Names
[RTLIB::FPTOSINT_PPCF128_I32
] = "__fixtfsi";
188 Names
[RTLIB::FPTOSINT_PPCF128_I64
] = "__fixtfdi";
189 Names
[RTLIB::FPTOSINT_PPCF128_I128
] = "__fixtfti";
190 Names
[RTLIB::FPTOUINT_F32_I8
] = "__fixunssfi8";
191 Names
[RTLIB::FPTOUINT_F32_I16
] = "__fixunssfi16";
192 Names
[RTLIB::FPTOUINT_F32_I32
] = "__fixunssfsi";
193 Names
[RTLIB::FPTOUINT_F32_I64
] = "__fixunssfdi";
194 Names
[RTLIB::FPTOUINT_F32_I128
] = "__fixunssfti";
195 Names
[RTLIB::FPTOUINT_F64_I32
] = "__fixunsdfsi";
196 Names
[RTLIB::FPTOUINT_F64_I64
] = "__fixunsdfdi";
197 Names
[RTLIB::FPTOUINT_F64_I128
] = "__fixunsdfti";
198 Names
[RTLIB::FPTOUINT_F80_I32
] = "__fixunsxfsi";
199 Names
[RTLIB::FPTOUINT_F80_I64
] = "__fixunsxfdi";
200 Names
[RTLIB::FPTOUINT_F80_I128
] = "__fixunsxfti";
201 Names
[RTLIB::FPTOUINT_PPCF128_I32
] = "__fixunstfsi";
202 Names
[RTLIB::FPTOUINT_PPCF128_I64
] = "__fixunstfdi";
203 Names
[RTLIB::FPTOUINT_PPCF128_I128
] = "__fixunstfti";
204 Names
[RTLIB::SINTTOFP_I32_F32
] = "__floatsisf";
205 Names
[RTLIB::SINTTOFP_I32_F64
] = "__floatsidf";
206 Names
[RTLIB::SINTTOFP_I32_F80
] = "__floatsixf";
207 Names
[RTLIB::SINTTOFP_I32_PPCF128
] = "__floatsitf";
208 Names
[RTLIB::SINTTOFP_I64_F32
] = "__floatdisf";
209 Names
[RTLIB::SINTTOFP_I64_F64
] = "__floatdidf";
210 Names
[RTLIB::SINTTOFP_I64_F80
] = "__floatdixf";
211 Names
[RTLIB::SINTTOFP_I64_PPCF128
] = "__floatditf";
212 Names
[RTLIB::SINTTOFP_I128_F32
] = "__floattisf";
213 Names
[RTLIB::SINTTOFP_I128_F64
] = "__floattidf";
214 Names
[RTLIB::SINTTOFP_I128_F80
] = "__floattixf";
215 Names
[RTLIB::SINTTOFP_I128_PPCF128
] = "__floattitf";
216 Names
[RTLIB::UINTTOFP_I32_F32
] = "__floatunsisf";
217 Names
[RTLIB::UINTTOFP_I32_F64
] = "__floatunsidf";
218 Names
[RTLIB::UINTTOFP_I32_F80
] = "__floatunsixf";
219 Names
[RTLIB::UINTTOFP_I32_PPCF128
] = "__floatunsitf";
220 Names
[RTLIB::UINTTOFP_I64_F32
] = "__floatundisf";
221 Names
[RTLIB::UINTTOFP_I64_F64
] = "__floatundidf";
222 Names
[RTLIB::UINTTOFP_I64_F80
] = "__floatundixf";
223 Names
[RTLIB::UINTTOFP_I64_PPCF128
] = "__floatunditf";
224 Names
[RTLIB::UINTTOFP_I128_F32
] = "__floatuntisf";
225 Names
[RTLIB::UINTTOFP_I128_F64
] = "__floatuntidf";
226 Names
[RTLIB::UINTTOFP_I128_F80
] = "__floatuntixf";
227 Names
[RTLIB::UINTTOFP_I128_PPCF128
] = "__floatuntitf";
228 Names
[RTLIB::OEQ_F32
] = "__eqsf2";
229 Names
[RTLIB::OEQ_F64
] = "__eqdf2";
230 Names
[RTLIB::UNE_F32
] = "__nesf2";
231 Names
[RTLIB::UNE_F64
] = "__nedf2";
232 Names
[RTLIB::OGE_F32
] = "__gesf2";
233 Names
[RTLIB::OGE_F64
] = "__gedf2";
234 Names
[RTLIB::OLT_F32
] = "__ltsf2";
235 Names
[RTLIB::OLT_F64
] = "__ltdf2";
236 Names
[RTLIB::OLE_F32
] = "__lesf2";
237 Names
[RTLIB::OLE_F64
] = "__ledf2";
238 Names
[RTLIB::OGT_F32
] = "__gtsf2";
239 Names
[RTLIB::OGT_F64
] = "__gtdf2";
240 Names
[RTLIB::UO_F32
] = "__unordsf2";
241 Names
[RTLIB::UO_F64
] = "__unorddf2";
242 Names
[RTLIB::O_F32
] = "__unordsf2";
243 Names
[RTLIB::O_F64
] = "__unorddf2";
244 Names
[RTLIB::MEMCPY
] = "memcpy";
245 Names
[RTLIB::MEMMOVE
] = "memmove";
246 Names
[RTLIB::MEMSET
] = "memset";
247 Names
[RTLIB::UNWIND_RESUME
] = "_Unwind_Resume";
250 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
251 /// UNKNOWN_LIBCALL if there is none.
252 RTLIB::Libcall
RTLIB::getFPEXT(MVT OpVT
, MVT RetVT
) {
253 if (OpVT
== MVT::f32
) {
254 if (RetVT
== MVT::f64
)
255 return FPEXT_F32_F64
;
257 return UNKNOWN_LIBCALL
;
260 /// getFPROUND - Return the FPROUND_*_* value for the given types, or
261 /// UNKNOWN_LIBCALL if there is none.
262 RTLIB::Libcall
RTLIB::getFPROUND(MVT OpVT
, MVT RetVT
) {
263 if (RetVT
== MVT::f32
) {
264 if (OpVT
== MVT::f64
)
265 return FPROUND_F64_F32
;
266 if (OpVT
== MVT::f80
)
267 return FPROUND_F80_F32
;
268 if (OpVT
== MVT::ppcf128
)
269 return FPROUND_PPCF128_F32
;
270 } else if (RetVT
== MVT::f64
) {
271 if (OpVT
== MVT::f80
)
272 return FPROUND_F80_F64
;
273 if (OpVT
== MVT::ppcf128
)
274 return FPROUND_PPCF128_F64
;
276 return UNKNOWN_LIBCALL
;
279 /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
280 /// UNKNOWN_LIBCALL if there is none.
281 RTLIB::Libcall
RTLIB::getFPTOSINT(MVT OpVT
, MVT RetVT
) {
282 if (OpVT
== MVT::f32
) {
283 if (RetVT
== MVT::i8
)
284 return FPTOSINT_F32_I8
;
285 if (RetVT
== MVT::i16
)
286 return FPTOSINT_F32_I16
;
287 if (RetVT
== MVT::i32
)
288 return FPTOSINT_F32_I32
;
289 if (RetVT
== MVT::i64
)
290 return FPTOSINT_F32_I64
;
291 if (RetVT
== MVT::i128
)
292 return FPTOSINT_F32_I128
;
293 } else if (OpVT
== MVT::f64
) {
294 if (RetVT
== MVT::i32
)
295 return FPTOSINT_F64_I32
;
296 if (RetVT
== MVT::i64
)
297 return FPTOSINT_F64_I64
;
298 if (RetVT
== MVT::i128
)
299 return FPTOSINT_F64_I128
;
300 } else if (OpVT
== MVT::f80
) {
301 if (RetVT
== MVT::i32
)
302 return FPTOSINT_F80_I32
;
303 if (RetVT
== MVT::i64
)
304 return FPTOSINT_F80_I64
;
305 if (RetVT
== MVT::i128
)
306 return FPTOSINT_F80_I128
;
307 } else if (OpVT
== MVT::ppcf128
) {
308 if (RetVT
== MVT::i32
)
309 return FPTOSINT_PPCF128_I32
;
310 if (RetVT
== MVT::i64
)
311 return FPTOSINT_PPCF128_I64
;
312 if (RetVT
== MVT::i128
)
313 return FPTOSINT_PPCF128_I128
;
315 return UNKNOWN_LIBCALL
;
318 /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
319 /// UNKNOWN_LIBCALL if there is none.
320 RTLIB::Libcall
RTLIB::getFPTOUINT(MVT OpVT
, MVT RetVT
) {
321 if (OpVT
== MVT::f32
) {
322 if (RetVT
== MVT::i8
)
323 return FPTOUINT_F32_I8
;
324 if (RetVT
== MVT::i16
)
325 return FPTOUINT_F32_I16
;
326 if (RetVT
== MVT::i32
)
327 return FPTOUINT_F32_I32
;
328 if (RetVT
== MVT::i64
)
329 return FPTOUINT_F32_I64
;
330 if (RetVT
== MVT::i128
)
331 return FPTOUINT_F32_I128
;
332 } else if (OpVT
== MVT::f64
) {
333 if (RetVT
== MVT::i32
)
334 return FPTOUINT_F64_I32
;
335 if (RetVT
== MVT::i64
)
336 return FPTOUINT_F64_I64
;
337 if (RetVT
== MVT::i128
)
338 return FPTOUINT_F64_I128
;
339 } else if (OpVT
== MVT::f80
) {
340 if (RetVT
== MVT::i32
)
341 return FPTOUINT_F80_I32
;
342 if (RetVT
== MVT::i64
)
343 return FPTOUINT_F80_I64
;
344 if (RetVT
== MVT::i128
)
345 return FPTOUINT_F80_I128
;
346 } else if (OpVT
== MVT::ppcf128
) {
347 if (RetVT
== MVT::i32
)
348 return FPTOUINT_PPCF128_I32
;
349 if (RetVT
== MVT::i64
)
350 return FPTOUINT_PPCF128_I64
;
351 if (RetVT
== MVT::i128
)
352 return FPTOUINT_PPCF128_I128
;
354 return UNKNOWN_LIBCALL
;
357 /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
358 /// UNKNOWN_LIBCALL if there is none.
359 RTLIB::Libcall
RTLIB::getSINTTOFP(MVT OpVT
, MVT RetVT
) {
360 if (OpVT
== MVT::i32
) {
361 if (RetVT
== MVT::f32
)
362 return SINTTOFP_I32_F32
;
363 else if (RetVT
== MVT::f64
)
364 return SINTTOFP_I32_F64
;
365 else if (RetVT
== MVT::f80
)
366 return SINTTOFP_I32_F80
;
367 else if (RetVT
== MVT::ppcf128
)
368 return SINTTOFP_I32_PPCF128
;
369 } else if (OpVT
== MVT::i64
) {
370 if (RetVT
== MVT::f32
)
371 return SINTTOFP_I64_F32
;
372 else if (RetVT
== MVT::f64
)
373 return SINTTOFP_I64_F64
;
374 else if (RetVT
== MVT::f80
)
375 return SINTTOFP_I64_F80
;
376 else if (RetVT
== MVT::ppcf128
)
377 return SINTTOFP_I64_PPCF128
;
378 } else if (OpVT
== MVT::i128
) {
379 if (RetVT
== MVT::f32
)
380 return SINTTOFP_I128_F32
;
381 else if (RetVT
== MVT::f64
)
382 return SINTTOFP_I128_F64
;
383 else if (RetVT
== MVT::f80
)
384 return SINTTOFP_I128_F80
;
385 else if (RetVT
== MVT::ppcf128
)
386 return SINTTOFP_I128_PPCF128
;
388 return UNKNOWN_LIBCALL
;
391 /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
392 /// UNKNOWN_LIBCALL if there is none.
393 RTLIB::Libcall
RTLIB::getUINTTOFP(MVT OpVT
, MVT RetVT
) {
394 if (OpVT
== MVT::i32
) {
395 if (RetVT
== MVT::f32
)
396 return UINTTOFP_I32_F32
;
397 else if (RetVT
== MVT::f64
)
398 return UINTTOFP_I32_F64
;
399 else if (RetVT
== MVT::f80
)
400 return UINTTOFP_I32_F80
;
401 else if (RetVT
== MVT::ppcf128
)
402 return UINTTOFP_I32_PPCF128
;
403 } else if (OpVT
== MVT::i64
) {
404 if (RetVT
== MVT::f32
)
405 return UINTTOFP_I64_F32
;
406 else if (RetVT
== MVT::f64
)
407 return UINTTOFP_I64_F64
;
408 else if (RetVT
== MVT::f80
)
409 return UINTTOFP_I64_F80
;
410 else if (RetVT
== MVT::ppcf128
)
411 return UINTTOFP_I64_PPCF128
;
412 } else if (OpVT
== MVT::i128
) {
413 if (RetVT
== MVT::f32
)
414 return UINTTOFP_I128_F32
;
415 else if (RetVT
== MVT::f64
)
416 return UINTTOFP_I128_F64
;
417 else if (RetVT
== MVT::f80
)
418 return UINTTOFP_I128_F80
;
419 else if (RetVT
== MVT::ppcf128
)
420 return UINTTOFP_I128_PPCF128
;
422 return UNKNOWN_LIBCALL
;
425 /// InitCmpLibcallCCs - Set default comparison libcall CC.
427 static void InitCmpLibcallCCs(ISD::CondCode
*CCs
) {
428 memset(CCs
, ISD::SETCC_INVALID
, sizeof(ISD::CondCode
)*RTLIB::UNKNOWN_LIBCALL
);
429 CCs
[RTLIB::OEQ_F32
] = ISD::SETEQ
;
430 CCs
[RTLIB::OEQ_F64
] = ISD::SETEQ
;
431 CCs
[RTLIB::UNE_F32
] = ISD::SETNE
;
432 CCs
[RTLIB::UNE_F64
] = ISD::SETNE
;
433 CCs
[RTLIB::OGE_F32
] = ISD::SETGE
;
434 CCs
[RTLIB::OGE_F64
] = ISD::SETGE
;
435 CCs
[RTLIB::OLT_F32
] = ISD::SETLT
;
436 CCs
[RTLIB::OLT_F64
] = ISD::SETLT
;
437 CCs
[RTLIB::OLE_F32
] = ISD::SETLE
;
438 CCs
[RTLIB::OLE_F64
] = ISD::SETLE
;
439 CCs
[RTLIB::OGT_F32
] = ISD::SETGT
;
440 CCs
[RTLIB::OGT_F64
] = ISD::SETGT
;
441 CCs
[RTLIB::UO_F32
] = ISD::SETNE
;
442 CCs
[RTLIB::UO_F64
] = ISD::SETNE
;
443 CCs
[RTLIB::O_F32
] = ISD::SETEQ
;
444 CCs
[RTLIB::O_F64
] = ISD::SETEQ
;
447 /// NOTE: The constructor takes ownership of TLOF.
448 TargetLowering::TargetLowering(TargetMachine
&tm
,TargetLoweringObjectFile
*tlof
)
449 : TM(tm
), TD(TM
.getTargetData()), TLOF(*tlof
) {
450 // All operations default to being supported.
451 memset(OpActions
, 0, sizeof(OpActions
));
452 memset(LoadExtActions
, 0, sizeof(LoadExtActions
));
453 memset(TruncStoreActions
, 0, sizeof(TruncStoreActions
));
454 memset(IndexedModeActions
, 0, sizeof(IndexedModeActions
));
455 memset(ConvertActions
, 0, sizeof(ConvertActions
));
456 memset(CondCodeActions
, 0, sizeof(CondCodeActions
));
458 // Set default actions for various operations.
459 for (unsigned VT
= 0; VT
!= (unsigned)MVT::LAST_VALUETYPE
; ++VT
) {
460 // Default all indexed load / store to expand.
461 for (unsigned IM
= (unsigned)ISD::PRE_INC
;
462 IM
!= (unsigned)ISD::LAST_INDEXED_MODE
; ++IM
) {
463 setIndexedLoadAction(IM
, (MVT::SimpleValueType
)VT
, Expand
);
464 setIndexedStoreAction(IM
, (MVT::SimpleValueType
)VT
, Expand
);
467 // These operations default to expand.
468 setOperationAction(ISD::FGETSIGN
, (MVT::SimpleValueType
)VT
, Expand
);
469 setOperationAction(ISD::CONCAT_VECTORS
, (MVT::SimpleValueType
)VT
, Expand
);
472 // Most targets ignore the @llvm.prefetch intrinsic.
473 setOperationAction(ISD::PREFETCH
, MVT::Other
, Expand
);
475 // ConstantFP nodes default to expand. Targets can either change this to
476 // Legal, in which case all fp constants are legal, or use addLegalFPImmediate
477 // to optimize expansions for certain constants.
478 setOperationAction(ISD::ConstantFP
, MVT::f32
, Expand
);
479 setOperationAction(ISD::ConstantFP
, MVT::f64
, Expand
);
480 setOperationAction(ISD::ConstantFP
, MVT::f80
, Expand
);
482 // These library functions default to expand.
483 setOperationAction(ISD::FLOG
, MVT::f64
, Expand
);
484 setOperationAction(ISD::FLOG2
, MVT::f64
, Expand
);
485 setOperationAction(ISD::FLOG10
,MVT::f64
, Expand
);
486 setOperationAction(ISD::FEXP
, MVT::f64
, Expand
);
487 setOperationAction(ISD::FEXP2
, MVT::f64
, Expand
);
488 setOperationAction(ISD::FLOG
, MVT::f32
, Expand
);
489 setOperationAction(ISD::FLOG2
, MVT::f32
, Expand
);
490 setOperationAction(ISD::FLOG10
,MVT::f32
, Expand
);
491 setOperationAction(ISD::FEXP
, MVT::f32
, Expand
);
492 setOperationAction(ISD::FEXP2
, MVT::f32
, Expand
);
494 // Default ISD::TRAP to expand (which turns it into abort).
495 setOperationAction(ISD::TRAP
, MVT::Other
, Expand
);
497 IsLittleEndian
= TD
->isLittleEndian();
498 UsesGlobalOffsetTable
= false;
499 ShiftAmountTy
= PointerTy
= getValueType(TD
->getIntPtrType());
500 memset(RegClassForVT
, 0,MVT::LAST_VALUETYPE
*sizeof(TargetRegisterClass
*));
501 memset(TargetDAGCombineArray
, 0, array_lengthof(TargetDAGCombineArray
));
502 maxStoresPerMemset
= maxStoresPerMemcpy
= maxStoresPerMemmove
= 8;
503 allowUnalignedMemoryAccesses
= false;
504 benefitFromCodePlacementOpt
= false;
505 UseUnderscoreSetJmp
= false;
506 UseUnderscoreLongJmp
= false;
507 SelectIsExpensive
= false;
508 IntDivIsCheap
= false;
509 Pow2DivIsCheap
= false;
510 StackPointerRegisterToSaveRestore
= 0;
511 ExceptionPointerRegister
= 0;
512 ExceptionSelectorRegister
= 0;
513 BooleanContents
= UndefinedBooleanContent
;
514 SchedPreferenceInfo
= SchedulingForLatency
;
516 JumpBufAlignment
= 0;
517 IfCvtBlockSizeLimit
= 2;
518 IfCvtDupBlockSizeLimit
= 0;
519 PrefLoopAlignment
= 0;
521 InitLibcallNames(LibcallRoutineNames
);
522 InitCmpLibcallCCs(CmpLibcallCCs
);
524 // Tell Legalize whether the assembler supports DEBUG_LOC.
525 const TargetAsmInfo
*TASM
= TM
.getTargetAsmInfo();
526 if (!TASM
|| !TASM
->hasDotLocAndDotFile())
527 setOperationAction(ISD::DEBUG_LOC
, MVT::Other
, Expand
);
530 TargetLowering::~TargetLowering() {
534 /// computeRegisterProperties - Once all of the register classes are added,
535 /// this allows us to compute derived properties we expose.
536 void TargetLowering::computeRegisterProperties() {
537 assert(MVT::LAST_VALUETYPE
<= MVT::MAX_ALLOWED_VALUETYPE
&&
538 "Too many value types for ValueTypeActions to hold!");
540 // Everything defaults to needing one register.
541 for (unsigned i
= 0; i
!= MVT::LAST_VALUETYPE
; ++i
) {
542 NumRegistersForVT
[i
] = 1;
543 RegisterTypeForVT
[i
] = TransformToType
[i
] = (MVT::SimpleValueType
)i
;
545 // ...except isVoid, which doesn't need any registers.
546 NumRegistersForVT
[MVT::isVoid
] = 0;
548 // Find the largest integer register class.
549 unsigned LargestIntReg
= MVT::LAST_INTEGER_VALUETYPE
;
550 for (; RegClassForVT
[LargestIntReg
] == 0; --LargestIntReg
)
551 assert(LargestIntReg
!= MVT::i1
&& "No integer registers defined!");
553 // Every integer value type larger than this largest register takes twice as
554 // many registers to represent as the previous ValueType.
555 for (unsigned ExpandedReg
= LargestIntReg
+ 1; ; ++ExpandedReg
) {
556 MVT EVT
= (MVT::SimpleValueType
)ExpandedReg
;
557 if (!EVT
.isInteger())
559 NumRegistersForVT
[ExpandedReg
] = 2*NumRegistersForVT
[ExpandedReg
-1];
560 RegisterTypeForVT
[ExpandedReg
] = (MVT::SimpleValueType
)LargestIntReg
;
561 TransformToType
[ExpandedReg
] = (MVT::SimpleValueType
)(ExpandedReg
- 1);
562 ValueTypeActions
.setTypeAction(EVT
, Expand
);
565 // Inspect all of the ValueType's smaller than the largest integer
566 // register to see which ones need promotion.
567 unsigned LegalIntReg
= LargestIntReg
;
568 for (unsigned IntReg
= LargestIntReg
- 1;
569 IntReg
>= (unsigned)MVT::i1
; --IntReg
) {
570 MVT IVT
= (MVT::SimpleValueType
)IntReg
;
571 if (isTypeLegal(IVT
)) {
572 LegalIntReg
= IntReg
;
574 RegisterTypeForVT
[IntReg
] = TransformToType
[IntReg
] =
575 (MVT::SimpleValueType
)LegalIntReg
;
576 ValueTypeActions
.setTypeAction(IVT
, Promote
);
580 // ppcf128 type is really two f64's.
581 if (!isTypeLegal(MVT::ppcf128
)) {
582 NumRegistersForVT
[MVT::ppcf128
] = 2*NumRegistersForVT
[MVT::f64
];
583 RegisterTypeForVT
[MVT::ppcf128
] = MVT::f64
;
584 TransformToType
[MVT::ppcf128
] = MVT::f64
;
585 ValueTypeActions
.setTypeAction(MVT::ppcf128
, Expand
);
588 // Decide how to handle f64. If the target does not have native f64 support,
589 // expand it to i64 and we will be generating soft float library calls.
590 if (!isTypeLegal(MVT::f64
)) {
591 NumRegistersForVT
[MVT::f64
] = NumRegistersForVT
[MVT::i64
];
592 RegisterTypeForVT
[MVT::f64
] = RegisterTypeForVT
[MVT::i64
];
593 TransformToType
[MVT::f64
] = MVT::i64
;
594 ValueTypeActions
.setTypeAction(MVT::f64
, Expand
);
597 // Decide how to handle f32. If the target does not have native support for
598 // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
599 if (!isTypeLegal(MVT::f32
)) {
600 if (isTypeLegal(MVT::f64
)) {
601 NumRegistersForVT
[MVT::f32
] = NumRegistersForVT
[MVT::f64
];
602 RegisterTypeForVT
[MVT::f32
] = RegisterTypeForVT
[MVT::f64
];
603 TransformToType
[MVT::f32
] = MVT::f64
;
604 ValueTypeActions
.setTypeAction(MVT::f32
, Promote
);
606 NumRegistersForVT
[MVT::f32
] = NumRegistersForVT
[MVT::i32
];
607 RegisterTypeForVT
[MVT::f32
] = RegisterTypeForVT
[MVT::i32
];
608 TransformToType
[MVT::f32
] = MVT::i32
;
609 ValueTypeActions
.setTypeAction(MVT::f32
, Expand
);
613 // Loop over all of the vector value types to see which need transformations.
614 for (unsigned i
= MVT::FIRST_VECTOR_VALUETYPE
;
615 i
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++i
) {
616 MVT VT
= (MVT::SimpleValueType
)i
;
617 if (!isTypeLegal(VT
)) {
618 MVT IntermediateVT
, RegisterVT
;
619 unsigned NumIntermediates
;
620 NumRegistersForVT
[i
] =
621 getVectorTypeBreakdown(VT
,
622 IntermediateVT
, NumIntermediates
,
624 RegisterTypeForVT
[i
] = RegisterVT
;
626 // Determine if there is a legal wider type.
627 bool IsLegalWiderType
= false;
628 MVT EltVT
= VT
.getVectorElementType();
629 unsigned NElts
= VT
.getVectorNumElements();
630 for (unsigned nVT
= i
+1; nVT
<= MVT::LAST_VECTOR_VALUETYPE
; ++nVT
) {
631 MVT SVT
= (MVT::SimpleValueType
)nVT
;
632 if (isTypeLegal(SVT
) && SVT
.getVectorElementType() == EltVT
&&
633 SVT
.getVectorNumElements() > NElts
) {
634 TransformToType
[i
] = SVT
;
635 ValueTypeActions
.setTypeAction(VT
, Promote
);
636 IsLegalWiderType
= true;
640 if (!IsLegalWiderType
) {
641 MVT NVT
= VT
.getPow2VectorType();
643 // Type is already a power of 2. The default action is to split.
644 TransformToType
[i
] = MVT::Other
;
645 ValueTypeActions
.setTypeAction(VT
, Expand
);
647 TransformToType
[i
] = NVT
;
648 ValueTypeActions
.setTypeAction(VT
, Promote
);
655 const char *TargetLowering::getTargetNodeName(unsigned Opcode
) const {
660 MVT
TargetLowering::getSetCCResultType(MVT VT
) const {
661 return getValueType(TD
->getIntPtrType());
665 /// getVectorTypeBreakdown - Vector types are broken down into some number of
666 /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
667 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
668 /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
670 /// This method returns the number of registers needed, and the VT for each
671 /// register. It also returns the VT and quantity of the intermediate values
672 /// before they are promoted/expanded.
674 unsigned TargetLowering::getVectorTypeBreakdown(MVT VT
,
676 unsigned &NumIntermediates
,
677 MVT
&RegisterVT
) const {
678 // Figure out the right, legal destination reg to copy into.
679 unsigned NumElts
= VT
.getVectorNumElements();
680 MVT EltTy
= VT
.getVectorElementType();
682 unsigned NumVectorRegs
= 1;
684 // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
685 // could break down into LHS/RHS like LegalizeDAG does.
686 if (!isPowerOf2_32(NumElts
)) {
687 NumVectorRegs
= NumElts
;
691 // Divide the input until we get to a supported size. This will always
692 // end with a scalar if the target doesn't support vectors.
693 while (NumElts
> 1 && !isTypeLegal(MVT::getVectorVT(EltTy
, NumElts
))) {
698 NumIntermediates
= NumVectorRegs
;
700 MVT NewVT
= MVT::getVectorVT(EltTy
, NumElts
);
701 if (!isTypeLegal(NewVT
))
703 IntermediateVT
= NewVT
;
705 MVT DestVT
= getRegisterType(NewVT
);
707 if (DestVT
.bitsLT(NewVT
)) {
708 // Value is expanded, e.g. i64 -> i16.
709 return NumVectorRegs
*(NewVT
.getSizeInBits()/DestVT
.getSizeInBits());
711 // Otherwise, promotion or legal types use the same number of registers as
712 // the vector decimated to the appropriate level.
713 return NumVectorRegs
;
719 /// getWidenVectorType: given a vector type, returns the type to widen to
720 /// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
721 /// If there is no vector type that we want to widen to, returns MVT::Other
722 /// When and where to widen is target dependent based on the cost of
723 /// scalarizing vs using the wider vector type.
724 MVT
TargetLowering::getWidenVectorType(MVT VT
) const {
725 assert(VT
.isVector());
729 // Default is not to widen until moved to LegalizeTypes
733 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
734 /// function arguments in the caller parameter area. This is the actual
735 /// alignment, not its logarithm.
736 unsigned TargetLowering::getByValTypeAlignment(const Type
*Ty
) const {
737 return TD
->getCallFrameTypeAlignment(Ty
);
740 SDValue
TargetLowering::getPICJumpTableRelocBase(SDValue Table
,
741 SelectionDAG
&DAG
) const {
742 if (usesGlobalOffsetTable())
743 return DAG
.getGLOBAL_OFFSET_TABLE(getPointerTy());
748 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
749 // Assume that everything is safe in static mode.
750 if (getTargetMachine().getRelocationModel() == Reloc::Static
)
753 // In dynamic-no-pic mode, assume that known defined values are safe.
754 if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC
&&
756 !GA
->getGlobal()->isDeclaration() &&
757 !GA
->getGlobal()->isWeakForLinker())
760 // Otherwise assume nothing is safe.
764 //===----------------------------------------------------------------------===//
765 // Optimization Methods
766 //===----------------------------------------------------------------------===//
768 /// ShrinkDemandedConstant - Check to see if the specified operand of the
769 /// specified instruction is a constant integer. If so, check to see if there
770 /// are any bits set in the constant that are not demanded. If so, shrink the
771 /// constant and return true.
772 bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op
,
773 const APInt
&Demanded
) {
774 DebugLoc dl
= Op
.getDebugLoc();
776 // FIXME: ISD::SELECT, ISD::SELECT_CC
777 switch (Op
.getOpcode()) {
782 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
783 if (!C
) return false;
785 if (Op
.getOpcode() == ISD::XOR
&&
786 (C
->getAPIntValue() | (~Demanded
)).isAllOnesValue())
789 // if we can expand it to have all bits set, do it
790 if (C
->getAPIntValue().intersects(~Demanded
)) {
791 MVT VT
= Op
.getValueType();
792 SDValue New
= DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op
.getOperand(0),
793 DAG
.getConstant(Demanded
&
796 return CombineTo(Op
, New
);
806 /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
807 /// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening
808 /// cast, but it could be generalized for targets with other types of
809 /// implicit widening casts.
811 TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op
,
813 const APInt
&Demanded
,
815 assert(Op
.getNumOperands() == 2 &&
816 "ShrinkDemandedOp only supports binary operators!");
817 assert(Op
.getNode()->getNumValues() == 1 &&
818 "ShrinkDemandedOp only supports nodes with one result!");
820 // Don't do this if the node has another user, which may require the
822 if (!Op
.getNode()->hasOneUse())
825 // Search for the smallest integer type with free casts to and from
826 // Op's type. For expedience, just check power-of-2 integer types.
827 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
828 unsigned SmallVTBits
= BitWidth
- Demanded
.countLeadingZeros();
829 if (!isPowerOf2_32(SmallVTBits
))
830 SmallVTBits
= NextPowerOf2(SmallVTBits
);
831 for (; SmallVTBits
< BitWidth
; SmallVTBits
= NextPowerOf2(SmallVTBits
)) {
832 MVT SmallVT
= MVT::getIntegerVT(SmallVTBits
);
833 if (TLI
.isTruncateFree(Op
.getValueType(), SmallVT
) &&
834 TLI
.isZExtFree(SmallVT
, Op
.getValueType())) {
835 // We found a type with free casts.
836 SDValue X
= DAG
.getNode(Op
.getOpcode(), dl
, SmallVT
,
837 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
,
838 Op
.getNode()->getOperand(0)),
839 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
,
840 Op
.getNode()->getOperand(1)));
841 SDValue Z
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, Op
.getValueType(), X
);
842 return CombineTo(Op
, Z
);
848 /// SimplifyDemandedBits - Look at Op. At this point, we know that only the
849 /// DemandedMask bits of the result of Op are ever used downstream. If we can
850 /// use this information to simplify Op, create a new simplified DAG node and
851 /// return true, returning the original and new nodes in Old and New. Otherwise,
852 /// analyze the expression and return a mask of KnownOne and KnownZero bits for
853 /// the expression (used to simplify the caller). The KnownZero/One bits may
854 /// only be accurate for those bits in the DemandedMask.
855 bool TargetLowering::SimplifyDemandedBits(SDValue Op
,
856 const APInt
&DemandedMask
,
859 TargetLoweringOpt
&TLO
,
860 unsigned Depth
) const {
861 unsigned BitWidth
= DemandedMask
.getBitWidth();
862 assert(Op
.getValueSizeInBits() == BitWidth
&&
863 "Mask size mismatches value type size!");
864 APInt NewMask
= DemandedMask
;
865 DebugLoc dl
= Op
.getDebugLoc();
867 // Don't know anything.
868 KnownZero
= KnownOne
= APInt(BitWidth
, 0);
870 // Other users may use these bits.
871 if (!Op
.getNode()->hasOneUse()) {
873 // If not at the root, Just compute the KnownZero/KnownOne bits to
874 // simplify things downstream.
875 TLO
.DAG
.ComputeMaskedBits(Op
, DemandedMask
, KnownZero
, KnownOne
, Depth
);
878 // If this is the root being simplified, allow it to have multiple uses,
879 // just set the NewMask to all bits.
880 NewMask
= APInt::getAllOnesValue(BitWidth
);
881 } else if (DemandedMask
== 0) {
882 // Not demanding any bits from Op.
883 if (Op
.getOpcode() != ISD::UNDEF
)
884 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(Op
.getValueType()));
886 } else if (Depth
== 6) { // Limit search depth.
890 APInt KnownZero2
, KnownOne2
, KnownZeroOut
, KnownOneOut
;
891 switch (Op
.getOpcode()) {
893 // We know all of the bits for a constant!
894 KnownOne
= cast
<ConstantSDNode
>(Op
)->getAPIntValue() & NewMask
;
895 KnownZero
= ~KnownOne
& NewMask
;
896 return false; // Don't fall through, will infinitely loop.
898 // If the RHS is a constant, check to see if the LHS would be zero without
899 // using the bits from the RHS. Below, we use knowledge about the RHS to
900 // simplify the LHS, here we're using information from the LHS to simplify
902 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
903 APInt LHSZero
, LHSOne
;
904 TLO
.DAG
.ComputeMaskedBits(Op
.getOperand(0), NewMask
,
905 LHSZero
, LHSOne
, Depth
+1);
906 // If the LHS already has zeros where RHSC does, this and is dead.
907 if ((LHSZero
& NewMask
) == (~RHSC
->getAPIntValue() & NewMask
))
908 return TLO
.CombineTo(Op
, Op
.getOperand(0));
909 // If any of the set bits in the RHS are known zero on the LHS, shrink
911 if (TLO
.ShrinkDemandedConstant(Op
, ~LHSZero
& NewMask
))
915 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero
,
916 KnownOne
, TLO
, Depth
+1))
918 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
919 if (SimplifyDemandedBits(Op
.getOperand(0), ~KnownZero
& NewMask
,
920 KnownZero2
, KnownOne2
, TLO
, Depth
+1))
922 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
924 // If all of the demanded bits are known one on one side, return the other.
925 // These bits cannot contribute to the result of the 'and'.
926 if ((NewMask
& ~KnownZero2
& KnownOne
) == (~KnownZero2
& NewMask
))
927 return TLO
.CombineTo(Op
, Op
.getOperand(0));
928 if ((NewMask
& ~KnownZero
& KnownOne2
) == (~KnownZero
& NewMask
))
929 return TLO
.CombineTo(Op
, Op
.getOperand(1));
930 // If all of the demanded bits in the inputs are known zeros, return zero.
931 if ((NewMask
& (KnownZero
|KnownZero2
)) == NewMask
)
932 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, Op
.getValueType()));
933 // If the RHS is a constant, see if we can simplify it.
934 if (TLO
.ShrinkDemandedConstant(Op
, ~KnownZero2
& NewMask
))
936 // If the operation can be done in a smaller type, do so.
937 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
940 // Output known-1 bits are only known if set in both the LHS & RHS.
941 KnownOne
&= KnownOne2
;
942 // Output known-0 are known to be clear if zero in either the LHS | RHS.
943 KnownZero
|= KnownZero2
;
946 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero
,
947 KnownOne
, TLO
, Depth
+1))
949 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
950 if (SimplifyDemandedBits(Op
.getOperand(0), ~KnownOne
& NewMask
,
951 KnownZero2
, KnownOne2
, TLO
, Depth
+1))
953 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
955 // If all of the demanded bits are known zero on one side, return the other.
956 // These bits cannot contribute to the result of the 'or'.
957 if ((NewMask
& ~KnownOne2
& KnownZero
) == (~KnownOne2
& NewMask
))
958 return TLO
.CombineTo(Op
, Op
.getOperand(0));
959 if ((NewMask
& ~KnownOne
& KnownZero2
) == (~KnownOne
& NewMask
))
960 return TLO
.CombineTo(Op
, Op
.getOperand(1));
961 // If all of the potentially set bits on one side are known to be set on
962 // the other side, just use the 'other' side.
963 if ((NewMask
& ~KnownZero
& KnownOne2
) == (~KnownZero
& NewMask
))
964 return TLO
.CombineTo(Op
, Op
.getOperand(0));
965 if ((NewMask
& ~KnownZero2
& KnownOne
) == (~KnownZero2
& NewMask
))
966 return TLO
.CombineTo(Op
, Op
.getOperand(1));
967 // If the RHS is a constant, see if we can simplify it.
968 if (TLO
.ShrinkDemandedConstant(Op
, NewMask
))
970 // If the operation can be done in a smaller type, do so.
971 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
974 // Output known-0 bits are only known if clear in both the LHS & RHS.
975 KnownZero
&= KnownZero2
;
976 // Output known-1 are known to be set if set in either the LHS | RHS.
977 KnownOne
|= KnownOne2
;
980 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero
,
981 KnownOne
, TLO
, Depth
+1))
983 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
984 if (SimplifyDemandedBits(Op
.getOperand(0), NewMask
, KnownZero2
,
985 KnownOne2
, TLO
, Depth
+1))
987 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
989 // If all of the demanded bits are known zero on one side, return the other.
990 // These bits cannot contribute to the result of the 'xor'.
991 if ((KnownZero
& NewMask
) == NewMask
)
992 return TLO
.CombineTo(Op
, Op
.getOperand(0));
993 if ((KnownZero2
& NewMask
) == NewMask
)
994 return TLO
.CombineTo(Op
, Op
.getOperand(1));
995 // If the operation can be done in a smaller type, do so.
996 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
999 // If all of the unknown bits are known to be zero on one side or the other
1000 // (but not both) turn this into an *inclusive* or.
1001 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1002 if ((NewMask
& ~KnownZero
& ~KnownZero2
) == 0)
1003 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::OR
, dl
, Op
.getValueType(),
1007 // Output known-0 bits are known if clear or set in both the LHS & RHS.
1008 KnownZeroOut
= (KnownZero
& KnownZero2
) | (KnownOne
& KnownOne2
);
1009 // Output known-1 are known to be set if set in only one of the LHS, RHS.
1010 KnownOneOut
= (KnownZero
& KnownOne2
) | (KnownOne
& KnownZero2
);
1012 // If all of the demanded bits on one side are known, and all of the set
1013 // bits on that side are also known to be set on the other side, turn this
1014 // into an AND, as we know the bits will be cleared.
1015 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1016 if ((NewMask
& (KnownZero
|KnownOne
)) == NewMask
) { // all known
1017 if ((KnownOne
& KnownOne2
) == KnownOne
) {
1018 MVT VT
= Op
.getValueType();
1019 SDValue ANDC
= TLO
.DAG
.getConstant(~KnownOne
& NewMask
, VT
);
1020 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::AND
, dl
, VT
,
1021 Op
.getOperand(0), ANDC
));
1025 // If the RHS is a constant, see if we can simplify it.
1026 // for XOR, we prefer to force bits to 1 if they will make a -1.
1027 // if we can't force bits, try to shrink constant
1028 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1029 APInt Expanded
= C
->getAPIntValue() | (~NewMask
);
1030 // if we can expand it to have all bits set, do it
1031 if (Expanded
.isAllOnesValue()) {
1032 if (Expanded
!= C
->getAPIntValue()) {
1033 MVT VT
= Op
.getValueType();
1034 SDValue New
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
,VT
, Op
.getOperand(0),
1035 TLO
.DAG
.getConstant(Expanded
, VT
));
1036 return TLO
.CombineTo(Op
, New
);
1038 // if it already has all the bits set, nothing to change
1039 // but don't shrink either!
1040 } else if (TLO
.ShrinkDemandedConstant(Op
, NewMask
)) {
1045 KnownZero
= KnownZeroOut
;
1046 KnownOne
= KnownOneOut
;
1049 if (SimplifyDemandedBits(Op
.getOperand(2), NewMask
, KnownZero
,
1050 KnownOne
, TLO
, Depth
+1))
1052 if (SimplifyDemandedBits(Op
.getOperand(1), NewMask
, KnownZero2
,
1053 KnownOne2
, TLO
, Depth
+1))
1055 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1056 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1058 // If the operands are constants, see if we can simplify them.
1059 if (TLO
.ShrinkDemandedConstant(Op
, NewMask
))
1062 // Only known if known in both the LHS and RHS.
1063 KnownOne
&= KnownOne2
;
1064 KnownZero
&= KnownZero2
;
1066 case ISD::SELECT_CC
:
1067 if (SimplifyDemandedBits(Op
.getOperand(3), NewMask
, KnownZero
,
1068 KnownOne
, TLO
, Depth
+1))
1070 if (SimplifyDemandedBits(Op
.getOperand(2), NewMask
, KnownZero2
,
1071 KnownOne2
, TLO
, Depth
+1))
1073 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1074 assert((KnownZero2
& KnownOne2
) == 0 && "Bits known to be one AND zero?");
1076 // If the operands are constants, see if we can simplify them.
1077 if (TLO
.ShrinkDemandedConstant(Op
, NewMask
))
1080 // Only known if known in both the LHS and RHS.
1081 KnownOne
&= KnownOne2
;
1082 KnownZero
&= KnownZero2
;
1085 if (ConstantSDNode
*SA
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1086 unsigned ShAmt
= SA
->getZExtValue();
1087 SDValue InOp
= Op
.getOperand(0);
1089 // If the shift count is an invalid immediate, don't do anything.
1090 if (ShAmt
>= BitWidth
)
1093 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1094 // single shift. We can do this if the bottom bits (which are shifted
1095 // out) are never demanded.
1096 if (InOp
.getOpcode() == ISD::SRL
&&
1097 isa
<ConstantSDNode
>(InOp
.getOperand(1))) {
1098 if (ShAmt
&& (NewMask
& APInt::getLowBitsSet(BitWidth
, ShAmt
)) == 0) {
1099 unsigned C1
= cast
<ConstantSDNode
>(InOp
.getOperand(1))->getZExtValue();
1100 unsigned Opc
= ISD::SHL
;
1101 int Diff
= ShAmt
-C1
;
1108 TLO
.DAG
.getConstant(Diff
, Op
.getOperand(1).getValueType());
1109 MVT VT
= Op
.getValueType();
1110 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
,
1111 InOp
.getOperand(0), NewSA
));
1115 if (SimplifyDemandedBits(Op
.getOperand(0), NewMask
.lshr(ShAmt
),
1116 KnownZero
, KnownOne
, TLO
, Depth
+1))
1118 KnownZero
<<= SA
->getZExtValue();
1119 KnownOne
<<= SA
->getZExtValue();
1120 // low bits known zero.
1121 KnownZero
|= APInt::getLowBitsSet(BitWidth
, SA
->getZExtValue());
1125 if (ConstantSDNode
*SA
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1126 MVT VT
= Op
.getValueType();
1127 unsigned ShAmt
= SA
->getZExtValue();
1128 unsigned VTSize
= VT
.getSizeInBits();
1129 SDValue InOp
= Op
.getOperand(0);
1131 // If the shift count is an invalid immediate, don't do anything.
1132 if (ShAmt
>= BitWidth
)
1135 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1136 // single shift. We can do this if the top bits (which are shifted out)
1137 // are never demanded.
1138 if (InOp
.getOpcode() == ISD::SHL
&&
1139 isa
<ConstantSDNode
>(InOp
.getOperand(1))) {
1140 if (ShAmt
&& (NewMask
& APInt::getHighBitsSet(VTSize
, ShAmt
)) == 0) {
1141 unsigned C1
= cast
<ConstantSDNode
>(InOp
.getOperand(1))->getZExtValue();
1142 unsigned Opc
= ISD::SRL
;
1143 int Diff
= ShAmt
-C1
;
1150 TLO
.DAG
.getConstant(Diff
, Op
.getOperand(1).getValueType());
1151 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
,
1152 InOp
.getOperand(0), NewSA
));
1156 // Compute the new bits that are at the top now.
1157 if (SimplifyDemandedBits(InOp
, (NewMask
<< ShAmt
),
1158 KnownZero
, KnownOne
, TLO
, Depth
+1))
1160 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1161 KnownZero
= KnownZero
.lshr(ShAmt
);
1162 KnownOne
= KnownOne
.lshr(ShAmt
);
1164 APInt HighBits
= APInt::getHighBitsSet(BitWidth
, ShAmt
);
1165 KnownZero
|= HighBits
; // High bits known zero.
1169 // If this is an arithmetic shift right and only the low-bit is set, we can
1170 // always convert this into a logical shr, even if the shift amount is
1171 // variable. The low bit of the shift cannot be an input sign bit unless
1172 // the shift amount is >= the size of the datatype, which is undefined.
1173 if (DemandedMask
== 1)
1174 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, Op
.getValueType(),
1175 Op
.getOperand(0), Op
.getOperand(1)));
1177 if (ConstantSDNode
*SA
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1178 MVT VT
= Op
.getValueType();
1179 unsigned ShAmt
= SA
->getZExtValue();
1181 // If the shift count is an invalid immediate, don't do anything.
1182 if (ShAmt
>= BitWidth
)
1185 APInt InDemandedMask
= (NewMask
<< ShAmt
);
1187 // If any of the demanded bits are produced by the sign extension, we also
1188 // demand the input sign bit.
1189 APInt HighBits
= APInt::getHighBitsSet(BitWidth
, ShAmt
);
1190 if (HighBits
.intersects(NewMask
))
1191 InDemandedMask
|= APInt::getSignBit(VT
.getSizeInBits());
1193 if (SimplifyDemandedBits(Op
.getOperand(0), InDemandedMask
,
1194 KnownZero
, KnownOne
, TLO
, Depth
+1))
1196 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1197 KnownZero
= KnownZero
.lshr(ShAmt
);
1198 KnownOne
= KnownOne
.lshr(ShAmt
);
1200 // Handle the sign bit, adjusted to where it is now in the mask.
1201 APInt SignBit
= APInt::getSignBit(BitWidth
).lshr(ShAmt
);
1203 // If the input sign bit is known to be zero, or if none of the top bits
1204 // are demanded, turn this into an unsigned shift right.
1205 if (KnownZero
.intersects(SignBit
) || (HighBits
& ~NewMask
) == HighBits
) {
1206 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
,
1209 } else if (KnownOne
.intersects(SignBit
)) { // New bits are known one.
1210 KnownOne
|= HighBits
;
1214 case ISD::SIGN_EXTEND_INREG
: {
1215 MVT EVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1217 // Sign extension. Compute the demanded bits in the result that are not
1218 // present in the input.
1219 APInt NewBits
= APInt::getHighBitsSet(BitWidth
,
1220 BitWidth
- EVT
.getSizeInBits()) &
1223 // If none of the extended bits are demanded, eliminate the sextinreg.
1225 return TLO
.CombineTo(Op
, Op
.getOperand(0));
1227 APInt InSignBit
= APInt::getSignBit(EVT
.getSizeInBits());
1228 InSignBit
.zext(BitWidth
);
1229 APInt InputDemandedBits
= APInt::getLowBitsSet(BitWidth
,
1230 EVT
.getSizeInBits()) &
1233 // Since the sign extended bits are demanded, we know that the sign
1235 InputDemandedBits
|= InSignBit
;
1237 if (SimplifyDemandedBits(Op
.getOperand(0), InputDemandedBits
,
1238 KnownZero
, KnownOne
, TLO
, Depth
+1))
1240 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1242 // If the sign bit of the input is known set or clear, then we know the
1243 // top bits of the result.
1245 // If the input sign bit is known zero, convert this into a zero extension.
1246 if (KnownZero
.intersects(InSignBit
))
1247 return TLO
.CombineTo(Op
,
1248 TLO
.DAG
.getZeroExtendInReg(Op
.getOperand(0),dl
,EVT
));
1250 if (KnownOne
.intersects(InSignBit
)) { // Input sign bit known set
1251 KnownOne
|= NewBits
;
1252 KnownZero
&= ~NewBits
;
1253 } else { // Input sign bit unknown
1254 KnownZero
&= ~NewBits
;
1255 KnownOne
&= ~NewBits
;
1259 case ISD::ZERO_EXTEND
: {
1260 unsigned OperandBitWidth
= Op
.getOperand(0).getValueSizeInBits();
1261 APInt InMask
= NewMask
;
1262 InMask
.trunc(OperandBitWidth
);
1264 // If none of the top bits are demanded, convert this into an any_extend.
1266 APInt::getHighBitsSet(BitWidth
, BitWidth
- OperandBitWidth
) & NewMask
;
1267 if (!NewBits
.intersects(NewMask
))
1268 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
,
1272 if (SimplifyDemandedBits(Op
.getOperand(0), InMask
,
1273 KnownZero
, KnownOne
, TLO
, Depth
+1))
1275 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1276 KnownZero
.zext(BitWidth
);
1277 KnownOne
.zext(BitWidth
);
1278 KnownZero
|= NewBits
;
1281 case ISD::SIGN_EXTEND
: {
1282 MVT InVT
= Op
.getOperand(0).getValueType();
1283 unsigned InBits
= InVT
.getSizeInBits();
1284 APInt InMask
= APInt::getLowBitsSet(BitWidth
, InBits
);
1285 APInt InSignBit
= APInt::getBitsSet(BitWidth
, InBits
- 1, InBits
);
1286 APInt NewBits
= ~InMask
& NewMask
;
1288 // If none of the top bits are demanded, convert this into an any_extend.
1290 return TLO
.CombineTo(Op
,TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
,
1294 // Since some of the sign extended bits are demanded, we know that the sign
1296 APInt InDemandedBits
= InMask
& NewMask
;
1297 InDemandedBits
|= InSignBit
;
1298 InDemandedBits
.trunc(InBits
);
1300 if (SimplifyDemandedBits(Op
.getOperand(0), InDemandedBits
, KnownZero
,
1301 KnownOne
, TLO
, Depth
+1))
1303 KnownZero
.zext(BitWidth
);
1304 KnownOne
.zext(BitWidth
);
1306 // If the sign bit is known zero, convert this to a zero extend.
1307 if (KnownZero
.intersects(InSignBit
))
1308 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ZERO_EXTEND
, dl
,
1312 // If the sign bit is known one, the top bits match.
1313 if (KnownOne
.intersects(InSignBit
)) {
1314 KnownOne
|= NewBits
;
1315 KnownZero
&= ~NewBits
;
1316 } else { // Otherwise, top bits aren't known.
1317 KnownOne
&= ~NewBits
;
1318 KnownZero
&= ~NewBits
;
1322 case ISD::ANY_EXTEND
: {
1323 unsigned OperandBitWidth
= Op
.getOperand(0).getValueSizeInBits();
1324 APInt InMask
= NewMask
;
1325 InMask
.trunc(OperandBitWidth
);
1326 if (SimplifyDemandedBits(Op
.getOperand(0), InMask
,
1327 KnownZero
, KnownOne
, TLO
, Depth
+1))
1329 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1330 KnownZero
.zext(BitWidth
);
1331 KnownOne
.zext(BitWidth
);
1334 case ISD::TRUNCATE
: {
1335 // Simplify the input, using demanded bit information, and compute the known
1336 // zero/one bits live out.
1337 APInt TruncMask
= NewMask
;
1338 TruncMask
.zext(Op
.getOperand(0).getValueSizeInBits());
1339 if (SimplifyDemandedBits(Op
.getOperand(0), TruncMask
,
1340 KnownZero
, KnownOne
, TLO
, Depth
+1))
1342 KnownZero
.trunc(BitWidth
);
1343 KnownOne
.trunc(BitWidth
);
1345 // If the input is only used by this truncate, see if we can shrink it based
1346 // on the known demanded bits.
1347 if (Op
.getOperand(0).getNode()->hasOneUse()) {
1348 SDValue In
= Op
.getOperand(0);
1349 unsigned InBitWidth
= In
.getValueSizeInBits();
1350 switch (In
.getOpcode()) {
1353 // Shrink SRL by a constant if none of the high bits shifted in are
1355 if (ConstantSDNode
*ShAmt
= dyn_cast
<ConstantSDNode
>(In
.getOperand(1))){
1356 APInt HighBits
= APInt::getHighBitsSet(InBitWidth
,
1357 InBitWidth
- BitWidth
);
1358 HighBits
= HighBits
.lshr(ShAmt
->getZExtValue());
1359 HighBits
.trunc(BitWidth
);
1361 if (ShAmt
->getZExtValue() < BitWidth
&& !(HighBits
& NewMask
)) {
1362 // None of the shifted in bits are needed. Add a truncate of the
1363 // shift input, then shift it.
1364 SDValue NewTrunc
= TLO
.DAG
.getNode(ISD::TRUNCATE
, dl
,
1367 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
,
1377 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1380 case ISD::AssertZext
: {
1381 MVT VT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1382 APInt InMask
= APInt::getLowBitsSet(BitWidth
,
1383 VT
.getSizeInBits());
1384 if (SimplifyDemandedBits(Op
.getOperand(0), InMask
& NewMask
,
1385 KnownZero
, KnownOne
, TLO
, Depth
+1))
1387 assert((KnownZero
& KnownOne
) == 0 && "Bits known to be one AND zero?");
1388 KnownZero
|= ~InMask
& NewMask
;
1391 case ISD::BIT_CONVERT
:
1393 // If this is an FP->Int bitcast and if the sign bit is the only thing that
1394 // is demanded, turn this into a FGETSIGN.
1395 if (NewMask
== MVT::getIntegerVTSignBit(Op
.getValueType()) &&
1396 MVT::isFloatingPoint(Op
.getOperand(0).getValueType()) &&
1397 !MVT::isVector(Op
.getOperand(0).getValueType())) {
1398 // Only do this xform if FGETSIGN is valid or if before legalize.
1399 if (!TLO
.AfterLegalize
||
1400 isOperationLegal(ISD::FGETSIGN
, Op
.getValueType())) {
1401 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1402 // place. We expect the SHL to be eliminated by other optimizations.
1403 SDValue Sign
= TLO
.DAG
.getNode(ISD::FGETSIGN
, Op
.getValueType(),
1405 unsigned ShVal
= Op
.getValueType().getSizeInBits()-1;
1406 SDValue ShAmt
= TLO
.DAG
.getConstant(ShVal
, getShiftAmountTy());
1407 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SHL
, Op
.getValueType(),
1416 // Add, Sub, and Mul don't demand any bits in positions beyond that
1417 // of the highest bit demanded of them.
1418 APInt LoMask
= APInt::getLowBitsSet(BitWidth
,
1419 BitWidth
- NewMask
.countLeadingZeros());
1420 if (SimplifyDemandedBits(Op
.getOperand(0), LoMask
, KnownZero2
,
1421 KnownOne2
, TLO
, Depth
+1))
1423 if (SimplifyDemandedBits(Op
.getOperand(1), LoMask
, KnownZero2
,
1424 KnownOne2
, TLO
, Depth
+1))
1426 // See if the operation should be performed at a smaller bit width.
1427 if (TLO
.ShrinkDemandedOp(Op
, BitWidth
, NewMask
, dl
))
1432 // Just use ComputeMaskedBits to compute output bits.
1433 TLO
.DAG
.ComputeMaskedBits(Op
, NewMask
, KnownZero
, KnownOne
, Depth
);
1437 // If we know the value of all of the demanded bits, return this as a
1439 if ((NewMask
& (KnownZero
|KnownOne
)) == NewMask
)
1440 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(KnownOne
, Op
.getValueType()));
1445 /// computeMaskedBitsForTargetNode - Determine which of the bits specified
1446 /// in Mask are known to be either zero or one and return them in the
1447 /// KnownZero/KnownOne bitsets.
1448 void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
1452 const SelectionDAG
&DAG
,
1453 unsigned Depth
) const {
1454 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
1455 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
1456 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
1457 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
1458 "Should use MaskedValueIsZero if you don't know whether Op"
1459 " is a target node!");
1460 KnownZero
= KnownOne
= APInt(Mask
.getBitWidth(), 0);
1463 /// ComputeNumSignBitsForTargetNode - This method can be implemented by
1464 /// targets that want to expose additional information about sign bits to the
1466 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op
,
1467 unsigned Depth
) const {
1468 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
1469 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
1470 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
1471 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
1472 "Should use ComputeNumSignBits if you don't know whether Op"
1473 " is a target node!");
1477 /// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
1478 /// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
1479 /// determine which bit is set.
1481 static bool ValueHasExactlyOneBitSet(SDValue Val
, const SelectionDAG
&DAG
) {
1482 // A left-shift of a constant one will have exactly one bit set, because
1483 // shifting the bit off the end is undefined.
1484 if (Val
.getOpcode() == ISD::SHL
)
1485 if (ConstantSDNode
*C
=
1486 dyn_cast
<ConstantSDNode
>(Val
.getNode()->getOperand(0)))
1487 if (C
->getAPIntValue() == 1)
1490 // Similarly, a right-shift of a constant sign-bit will have exactly
1492 if (Val
.getOpcode() == ISD::SRL
)
1493 if (ConstantSDNode
*C
=
1494 dyn_cast
<ConstantSDNode
>(Val
.getNode()->getOperand(0)))
1495 if (C
->getAPIntValue().isSignBit())
1498 // More could be done here, though the above checks are enough
1499 // to handle some common cases.
1501 // Fall back to ComputeMaskedBits to catch other known cases.
1502 MVT OpVT
= Val
.getValueType();
1503 unsigned BitWidth
= OpVT
.getSizeInBits();
1504 APInt Mask
= APInt::getAllOnesValue(BitWidth
);
1505 APInt KnownZero
, KnownOne
;
1506 DAG
.ComputeMaskedBits(Val
, Mask
, KnownZero
, KnownOne
);
1507 return (KnownZero
.countPopulation() == BitWidth
- 1) &&
1508 (KnownOne
.countPopulation() == 1);
1511 /// SimplifySetCC - Try to simplify a setcc built with the specified operands
1512 /// and cc. If it is unable to simplify it, return a null SDValue.
1514 TargetLowering::SimplifySetCC(MVT VT
, SDValue N0
, SDValue N1
,
1515 ISD::CondCode Cond
, bool foldBooleans
,
1516 DAGCombinerInfo
&DCI
, DebugLoc dl
) const {
1517 SelectionDAG
&DAG
= DCI
.DAG
;
1519 // These setcc operations always fold.
1523 case ISD::SETFALSE2
: return DAG
.getConstant(0, VT
);
1525 case ISD::SETTRUE2
: return DAG
.getConstant(1, VT
);
1528 if (isa
<ConstantSDNode
>(N0
.getNode())) {
1529 // Ensure that the constant occurs on the RHS, and fold constant
1531 return DAG
.getSetCC(dl
, VT
, N1
, N0
, ISD::getSetCCSwappedOperands(Cond
));
1534 if (ConstantSDNode
*N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode())) {
1535 const APInt
&C1
= N1C
->getAPIntValue();
1537 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
1538 // equality comparison, then we're just comparing whether X itself is
1540 if (N0
.getOpcode() == ISD::SRL
&& (C1
== 0 || C1
== 1) &&
1541 N0
.getOperand(0).getOpcode() == ISD::CTLZ
&&
1542 N0
.getOperand(1).getOpcode() == ISD::Constant
) {
1543 unsigned ShAmt
= cast
<ConstantSDNode
>(N0
.getOperand(1))->getZExtValue();
1544 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
1545 ShAmt
== Log2_32(N0
.getValueType().getSizeInBits())) {
1546 if ((C1
== 0) == (Cond
== ISD::SETEQ
)) {
1547 // (srl (ctlz x), 5) == 0 -> X != 0
1548 // (srl (ctlz x), 5) != 1 -> X != 0
1551 // (srl (ctlz x), 5) != 0 -> X == 0
1552 // (srl (ctlz x), 5) == 1 -> X == 0
1555 SDValue Zero
= DAG
.getConstant(0, N0
.getValueType());
1556 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0).getOperand(0),
1561 // If the LHS is '(and load, const)', the RHS is 0,
1562 // the test is for equality or unsigned, and all 1 bits of the const are
1563 // in the same partial word, see if we can shorten the load.
1564 if (DCI
.isBeforeLegalize() &&
1565 N0
.getOpcode() == ISD::AND
&& C1
== 0 &&
1566 N0
.getNode()->hasOneUse() &&
1567 isa
<LoadSDNode
>(N0
.getOperand(0)) &&
1568 N0
.getOperand(0).getNode()->hasOneUse() &&
1569 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
1570 LoadSDNode
*Lod
= cast
<LoadSDNode
>(N0
.getOperand(0));
1571 uint64_t bestMask
= 0;
1572 unsigned bestWidth
= 0, bestOffset
= 0;
1573 if (!Lod
->isVolatile() && Lod
->isUnindexed() &&
1574 // FIXME: This uses getZExtValue() below so it only works on i64 and
1576 N0
.getValueType().getSizeInBits() <= 64) {
1577 unsigned origWidth
= N0
.getValueType().getSizeInBits();
1578 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
1579 // 8 bits, but have to be careful...
1580 if (Lod
->getExtensionType() != ISD::NON_EXTLOAD
)
1581 origWidth
= Lod
->getMemoryVT().getSizeInBits();
1582 uint64_t Mask
=cast
<ConstantSDNode
>(N0
.getOperand(1))->getZExtValue();
1583 for (unsigned width
= origWidth
/ 2; width
>=8; width
/= 2) {
1584 uint64_t newMask
= (1ULL << width
) - 1;
1585 for (unsigned offset
=0; offset
<origWidth
/width
; offset
++) {
1586 if ((newMask
& Mask
) == Mask
) {
1587 if (!TD
->isLittleEndian())
1588 bestOffset
= (origWidth
/width
- offset
- 1) * (width
/8);
1590 bestOffset
= (uint64_t)offset
* (width
/8);
1591 bestMask
= Mask
>> (offset
* (width
/8) * 8);
1595 newMask
= newMask
<< width
;
1600 MVT newVT
= MVT::getIntegerVT(bestWidth
);
1601 if (newVT
.isRound()) {
1602 MVT PtrType
= Lod
->getOperand(1).getValueType();
1603 SDValue Ptr
= Lod
->getBasePtr();
1604 if (bestOffset
!= 0)
1605 Ptr
= DAG
.getNode(ISD::ADD
, dl
, PtrType
, Lod
->getBasePtr(),
1606 DAG
.getConstant(bestOffset
, PtrType
));
1607 unsigned NewAlign
= MinAlign(Lod
->getAlignment(), bestOffset
);
1608 SDValue NewLoad
= DAG
.getLoad(newVT
, dl
, Lod
->getChain(), Ptr
,
1610 Lod
->getSrcValueOffset() + bestOffset
,
1612 return DAG
.getSetCC(dl
, VT
,
1613 DAG
.getNode(ISD::AND
, dl
, newVT
, NewLoad
,
1614 DAG
.getConstant(bestMask
, newVT
)),
1615 DAG
.getConstant(0LL, newVT
), Cond
);
1620 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
1621 if (N0
.getOpcode() == ISD::ZERO_EXTEND
) {
1622 unsigned InSize
= N0
.getOperand(0).getValueType().getSizeInBits();
1624 // If the comparison constant has bits in the upper part, the
1625 // zero-extended value could never match.
1626 if (C1
.intersects(APInt::getHighBitsSet(C1
.getBitWidth(),
1627 C1
.getBitWidth() - InSize
))) {
1631 case ISD::SETEQ
: return DAG
.getConstant(0, VT
);
1634 case ISD::SETNE
: return DAG
.getConstant(1, VT
);
1637 // True if the sign bit of C1 is set.
1638 return DAG
.getConstant(C1
.isNegative(), VT
);
1641 // True if the sign bit of C1 isn't set.
1642 return DAG
.getConstant(C1
.isNonNegative(), VT
);
1648 // Otherwise, we can perform the comparison with the low bits.
1656 MVT newVT
= N0
.getOperand(0).getValueType();
1657 if (DCI
.isBeforeLegalizeOps() ||
1658 (isOperationLegal(ISD::SETCC
, newVT
) &&
1659 getCondCodeAction(Cond
, newVT
)==Legal
))
1660 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
1661 DAG
.getConstant(APInt(C1
).trunc(InSize
), newVT
),
1666 break; // todo, be more careful with signed comparisons
1668 } else if (N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
1669 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
1670 MVT ExtSrcTy
= cast
<VTSDNode
>(N0
.getOperand(1))->getVT();
1671 unsigned ExtSrcTyBits
= ExtSrcTy
.getSizeInBits();
1672 MVT ExtDstTy
= N0
.getValueType();
1673 unsigned ExtDstTyBits
= ExtDstTy
.getSizeInBits();
1675 // If the extended part has any inconsistent bits, it cannot ever
1676 // compare equal. In other words, they have to be all ones or all
1679 APInt::getHighBitsSet(ExtDstTyBits
, ExtDstTyBits
- ExtSrcTyBits
);
1680 if ((C1
& ExtBits
) != 0 && (C1
& ExtBits
) != ExtBits
)
1681 return DAG
.getConstant(Cond
== ISD::SETNE
, VT
);
1684 MVT Op0Ty
= N0
.getOperand(0).getValueType();
1685 if (Op0Ty
== ExtSrcTy
) {
1686 ZextOp
= N0
.getOperand(0);
1688 APInt Imm
= APInt::getLowBitsSet(ExtDstTyBits
, ExtSrcTyBits
);
1689 ZextOp
= DAG
.getNode(ISD::AND
, dl
, Op0Ty
, N0
.getOperand(0),
1690 DAG
.getConstant(Imm
, Op0Ty
));
1692 if (!DCI
.isCalledByLegalizer())
1693 DCI
.AddToWorklist(ZextOp
.getNode());
1694 // Otherwise, make this a use of a zext.
1695 return DAG
.getSetCC(dl
, VT
, ZextOp
,
1696 DAG
.getConstant(C1
& APInt::getLowBitsSet(
1701 } else if ((N1C
->isNullValue() || N1C
->getAPIntValue() == 1) &&
1702 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
1704 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
1705 if (N0
.getOpcode() == ISD::SETCC
) {
1706 bool TrueWhenTrue
= (Cond
== ISD::SETEQ
) ^ (N1C
->getZExtValue() != 1);
1710 // Invert the condition.
1711 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
1712 CC
= ISD::getSetCCInverse(CC
,
1713 N0
.getOperand(0).getValueType().isInteger());
1714 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N0
.getOperand(1), CC
);
1717 if ((N0
.getOpcode() == ISD::XOR
||
1718 (N0
.getOpcode() == ISD::AND
&&
1719 N0
.getOperand(0).getOpcode() == ISD::XOR
&&
1720 N0
.getOperand(1) == N0
.getOperand(0).getOperand(1))) &&
1721 isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
1722 cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue() == 1) {
1723 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
1724 // can only do this if the top bits are known zero.
1725 unsigned BitWidth
= N0
.getValueSizeInBits();
1726 if (DAG
.MaskedValueIsZero(N0
,
1727 APInt::getHighBitsSet(BitWidth
,
1729 // Okay, get the un-inverted input value.
1731 if (N0
.getOpcode() == ISD::XOR
)
1732 Val
= N0
.getOperand(0);
1734 assert(N0
.getOpcode() == ISD::AND
&&
1735 N0
.getOperand(0).getOpcode() == ISD::XOR
);
1736 // ((X^1)&1)^1 -> X & 1
1737 Val
= DAG
.getNode(ISD::AND
, dl
, N0
.getValueType(),
1738 N0
.getOperand(0).getOperand(0),
1741 return DAG
.getSetCC(dl
, VT
, Val
, N1
,
1742 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
1747 APInt MinVal
, MaxVal
;
1748 unsigned OperandBitSize
= N1C
->getValueType(0).getSizeInBits();
1749 if (ISD::isSignedIntSetCC(Cond
)) {
1750 MinVal
= APInt::getSignedMinValue(OperandBitSize
);
1751 MaxVal
= APInt::getSignedMaxValue(OperandBitSize
);
1753 MinVal
= APInt::getMinValue(OperandBitSize
);
1754 MaxVal
= APInt::getMaxValue(OperandBitSize
);
1757 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
1758 if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
) {
1759 if (C1
== MinVal
) return DAG
.getConstant(1, VT
); // X >= MIN --> true
1760 // X >= C0 --> X > (C0-1)
1761 return DAG
.getSetCC(dl
, VT
, N0
,
1762 DAG
.getConstant(C1
-1, N1
.getValueType()),
1763 (Cond
== ISD::SETGE
) ? ISD::SETGT
: ISD::SETUGT
);
1766 if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
) {
1767 if (C1
== MaxVal
) return DAG
.getConstant(1, VT
); // X <= MAX --> true
1768 // X <= C0 --> X < (C0+1)
1769 return DAG
.getSetCC(dl
, VT
, N0
,
1770 DAG
.getConstant(C1
+1, N1
.getValueType()),
1771 (Cond
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETULT
);
1774 if ((Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) && C1
== MinVal
)
1775 return DAG
.getConstant(0, VT
); // X < MIN --> false
1776 if ((Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
) && C1
== MinVal
)
1777 return DAG
.getConstant(1, VT
); // X >= MIN --> true
1778 if ((Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) && C1
== MaxVal
)
1779 return DAG
.getConstant(0, VT
); // X > MAX --> false
1780 if ((Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
) && C1
== MaxVal
)
1781 return DAG
.getConstant(1, VT
); // X <= MAX --> true
1783 // Canonicalize setgt X, Min --> setne X, Min
1784 if ((Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) && C1
== MinVal
)
1785 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
1786 // Canonicalize setlt X, Max --> setne X, Max
1787 if ((Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) && C1
== MaxVal
)
1788 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
1790 // If we have setult X, 1, turn it into seteq X, 0
1791 if ((Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) && C1
== MinVal
+1)
1792 return DAG
.getSetCC(dl
, VT
, N0
,
1793 DAG
.getConstant(MinVal
, N0
.getValueType()),
1795 // If we have setugt X, Max-1, turn it into seteq X, Max
1796 else if ((Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) && C1
== MaxVal
-1)
1797 return DAG
.getSetCC(dl
, VT
, N0
,
1798 DAG
.getConstant(MaxVal
, N0
.getValueType()),
1801 // If we have "setcc X, C0", check to see if we can shrink the immediate
1804 // SETUGT X, SINTMAX -> SETLT X, 0
1805 if (Cond
== ISD::SETUGT
&&
1806 C1
== APInt::getSignedMaxValue(OperandBitSize
))
1807 return DAG
.getSetCC(dl
, VT
, N0
,
1808 DAG
.getConstant(0, N1
.getValueType()),
1811 // SETULT X, SINTMIN -> SETGT X, -1
1812 if (Cond
== ISD::SETULT
&&
1813 C1
== APInt::getSignedMinValue(OperandBitSize
)) {
1814 SDValue ConstMinusOne
=
1815 DAG
.getConstant(APInt::getAllOnesValue(OperandBitSize
),
1817 return DAG
.getSetCC(dl
, VT
, N0
, ConstMinusOne
, ISD::SETGT
);
1820 // Fold bit comparisons when we can.
1821 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
1822 VT
== N0
.getValueType() && N0
.getOpcode() == ISD::AND
)
1823 if (ConstantSDNode
*AndRHS
=
1824 dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
1825 MVT ShiftTy
= DCI
.isBeforeLegalize() ?
1826 getPointerTy() : getShiftAmountTy();
1827 if (Cond
== ISD::SETNE
&& C1
== 0) {// (X & 8) != 0 --> (X & 8) >> 3
1828 // Perform the xform if the AND RHS is a single bit.
1829 if (isPowerOf2_64(AndRHS
->getZExtValue())) {
1830 return DAG
.getNode(ISD::SRL
, dl
, VT
, N0
,
1831 DAG
.getConstant(Log2_64(AndRHS
->getZExtValue()),
1834 } else if (Cond
== ISD::SETEQ
&& C1
== AndRHS
->getZExtValue()) {
1835 // (X & 8) == 8 --> (X & 8) >> 3
1836 // Perform the xform if C1 is a single bit.
1837 if (C1
.isPowerOf2()) {
1838 return DAG
.getNode(ISD::SRL
, dl
, VT
, N0
,
1839 DAG
.getConstant(C1
.logBase2(), ShiftTy
));
1845 if (isa
<ConstantFPSDNode
>(N0
.getNode())) {
1846 // Constant fold or commute setcc.
1847 SDValue O
= DAG
.FoldSetCC(VT
, N0
, N1
, Cond
, dl
);
1848 if (O
.getNode()) return O
;
1849 } else if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(N1
.getNode())) {
1850 // If the RHS of an FP comparison is a constant, simplify it away in
1852 if (CFP
->getValueAPF().isNaN()) {
1853 // If an operand is known to be a nan, we can fold it.
1854 switch (ISD::getUnorderedFlavor(Cond
)) {
1855 default: llvm_unreachable("Unknown flavor!");
1856 case 0: // Known false.
1857 return DAG
.getConstant(0, VT
);
1858 case 1: // Known true.
1859 return DAG
.getConstant(1, VT
);
1860 case 2: // Undefined.
1861 return DAG
.getUNDEF(VT
);
1865 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
1866 // constant if knowing that the operand is non-nan is enough. We prefer to
1867 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
1869 if (Cond
== ISD::SETO
|| Cond
== ISD::SETUO
)
1870 return DAG
.getSetCC(dl
, VT
, N0
, N0
, Cond
);
1874 // We can always fold X == X for integer setcc's.
1875 if (N0
.getValueType().isInteger())
1876 return DAG
.getConstant(ISD::isTrueWhenEqual(Cond
), VT
);
1877 unsigned UOF
= ISD::getUnorderedFlavor(Cond
);
1878 if (UOF
== 2) // FP operators that are undefined on NaNs.
1879 return DAG
.getConstant(ISD::isTrueWhenEqual(Cond
), VT
);
1880 if (UOF
== unsigned(ISD::isTrueWhenEqual(Cond
)))
1881 return DAG
.getConstant(UOF
, VT
);
1882 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
1883 // if it is not already.
1884 ISD::CondCode NewCond
= UOF
== 0 ? ISD::SETO
: ISD::SETUO
;
1885 if (NewCond
!= Cond
)
1886 return DAG
.getSetCC(dl
, VT
, N0
, N1
, NewCond
);
1889 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
1890 N0
.getValueType().isInteger()) {
1891 if (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::SUB
||
1892 N0
.getOpcode() == ISD::XOR
) {
1893 // Simplify (X+Y) == (X+Z) --> Y == Z
1894 if (N0
.getOpcode() == N1
.getOpcode()) {
1895 if (N0
.getOperand(0) == N1
.getOperand(0))
1896 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(1), Cond
);
1897 if (N0
.getOperand(1) == N1
.getOperand(1))
1898 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(0), Cond
);
1899 if (DAG
.isCommutativeBinOp(N0
.getOpcode())) {
1900 // If X op Y == Y op X, try other combinations.
1901 if (N0
.getOperand(0) == N1
.getOperand(1))
1902 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(0),
1904 if (N0
.getOperand(1) == N1
.getOperand(0))
1905 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(1),
1910 if (ConstantSDNode
*RHSC
= dyn_cast
<ConstantSDNode
>(N1
)) {
1911 if (ConstantSDNode
*LHSR
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
1912 // Turn (X+C1) == C2 --> X == C2-C1
1913 if (N0
.getOpcode() == ISD::ADD
&& N0
.getNode()->hasOneUse()) {
1914 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
1915 DAG
.getConstant(RHSC
->getAPIntValue()-
1916 LHSR
->getAPIntValue(),
1917 N0
.getValueType()), Cond
);
1920 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
1921 if (N0
.getOpcode() == ISD::XOR
)
1922 // If we know that all of the inverted bits are zero, don't bother
1923 // performing the inversion.
1924 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), ~LHSR
->getAPIntValue()))
1926 DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
1927 DAG
.getConstant(LHSR
->getAPIntValue() ^
1928 RHSC
->getAPIntValue(),
1933 // Turn (C1-X) == C2 --> X == C1-C2
1934 if (ConstantSDNode
*SUBC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0))) {
1935 if (N0
.getOpcode() == ISD::SUB
&& N0
.getNode()->hasOneUse()) {
1937 DAG
.getSetCC(dl
, VT
, N0
.getOperand(1),
1938 DAG
.getConstant(SUBC
->getAPIntValue() -
1939 RHSC
->getAPIntValue(),
1946 // Simplify (X+Z) == X --> Z == 0
1947 if (N0
.getOperand(0) == N1
)
1948 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1),
1949 DAG
.getConstant(0, N0
.getValueType()), Cond
);
1950 if (N0
.getOperand(1) == N1
) {
1951 if (DAG
.isCommutativeBinOp(N0
.getOpcode()))
1952 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
1953 DAG
.getConstant(0, N0
.getValueType()), Cond
);
1954 else if (N0
.getNode()->hasOneUse()) {
1955 assert(N0
.getOpcode() == ISD::SUB
&& "Unexpected operation!");
1956 // (Z-X) == X --> Z == X<<1
1957 SDValue SH
= DAG
.getNode(ISD::SHL
, dl
, N1
.getValueType(),
1959 DAG
.getConstant(1, getShiftAmountTy()));
1960 if (!DCI
.isCalledByLegalizer())
1961 DCI
.AddToWorklist(SH
.getNode());
1962 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), SH
, Cond
);
1967 if (N1
.getOpcode() == ISD::ADD
|| N1
.getOpcode() == ISD::SUB
||
1968 N1
.getOpcode() == ISD::XOR
) {
1969 // Simplify X == (X+Z) --> Z == 0
1970 if (N1
.getOperand(0) == N0
) {
1971 return DAG
.getSetCC(dl
, VT
, N1
.getOperand(1),
1972 DAG
.getConstant(0, N1
.getValueType()), Cond
);
1973 } else if (N1
.getOperand(1) == N0
) {
1974 if (DAG
.isCommutativeBinOp(N1
.getOpcode())) {
1975 return DAG
.getSetCC(dl
, VT
, N1
.getOperand(0),
1976 DAG
.getConstant(0, N1
.getValueType()), Cond
);
1977 } else if (N1
.getNode()->hasOneUse()) {
1978 assert(N1
.getOpcode() == ISD::SUB
&& "Unexpected operation!");
1979 // X == (Z-X) --> X<<1 == Z
1980 SDValue SH
= DAG
.getNode(ISD::SHL
, dl
, N1
.getValueType(), N0
,
1981 DAG
.getConstant(1, getShiftAmountTy()));
1982 if (!DCI
.isCalledByLegalizer())
1983 DCI
.AddToWorklist(SH
.getNode());
1984 return DAG
.getSetCC(dl
, VT
, SH
, N1
.getOperand(0), Cond
);
1989 // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
1990 // Note that where y is variable and is known to have at most
1991 // one bit set (for example, if it is z&1) we cannot do this;
1992 // the expressions are not equivalent when y==0.
1993 if (N0
.getOpcode() == ISD::AND
)
1994 if (N0
.getOperand(0) == N1
|| N0
.getOperand(1) == N1
) {
1995 if (ValueHasExactlyOneBitSet(N1
, DAG
)) {
1996 Cond
= ISD::getSetCCInverse(Cond
, /*isInteger=*/true);
1997 SDValue Zero
= DAG
.getConstant(0, N1
.getValueType());
1998 return DAG
.getSetCC(dl
, VT
, N0
, Zero
, Cond
);
2001 if (N1
.getOpcode() == ISD::AND
)
2002 if (N1
.getOperand(0) == N0
|| N1
.getOperand(1) == N0
) {
2003 if (ValueHasExactlyOneBitSet(N0
, DAG
)) {
2004 Cond
= ISD::getSetCCInverse(Cond
, /*isInteger=*/true);
2005 SDValue Zero
= DAG
.getConstant(0, N0
.getValueType());
2006 return DAG
.getSetCC(dl
, VT
, N1
, Zero
, Cond
);
2011 // Fold away ALL boolean setcc's.
2013 if (N0
.getValueType() == MVT::i1
&& foldBooleans
) {
2015 default: llvm_unreachable("Unknown integer setcc!");
2016 case ISD::SETEQ
: // X == Y -> ~(X^Y)
2017 Temp
= DAG
.getNode(ISD::XOR
, dl
, MVT::i1
, N0
, N1
);
2018 N0
= DAG
.getNOT(dl
, Temp
, MVT::i1
);
2019 if (!DCI
.isCalledByLegalizer())
2020 DCI
.AddToWorklist(Temp
.getNode());
2022 case ISD::SETNE
: // X != Y --> (X^Y)
2023 N0
= DAG
.getNode(ISD::XOR
, dl
, MVT::i1
, N0
, N1
);
2025 case ISD::SETGT
: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
2026 case ISD::SETULT
: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
2027 Temp
= DAG
.getNOT(dl
, N0
, MVT::i1
);
2028 N0
= DAG
.getNode(ISD::AND
, dl
, MVT::i1
, N1
, Temp
);
2029 if (!DCI
.isCalledByLegalizer())
2030 DCI
.AddToWorklist(Temp
.getNode());
2032 case ISD::SETLT
: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
2033 case ISD::SETUGT
: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
2034 Temp
= DAG
.getNOT(dl
, N1
, MVT::i1
);
2035 N0
= DAG
.getNode(ISD::AND
, dl
, MVT::i1
, N0
, Temp
);
2036 if (!DCI
.isCalledByLegalizer())
2037 DCI
.AddToWorklist(Temp
.getNode());
2039 case ISD::SETULE
: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
2040 case ISD::SETGE
: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
2041 Temp
= DAG
.getNOT(dl
, N0
, MVT::i1
);
2042 N0
= DAG
.getNode(ISD::OR
, dl
, MVT::i1
, N1
, Temp
);
2043 if (!DCI
.isCalledByLegalizer())
2044 DCI
.AddToWorklist(Temp
.getNode());
2046 case ISD::SETUGE
: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
2047 case ISD::SETLE
: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
2048 Temp
= DAG
.getNOT(dl
, N1
, MVT::i1
);
2049 N0
= DAG
.getNode(ISD::OR
, dl
, MVT::i1
, N0
, Temp
);
2052 if (VT
!= MVT::i1
) {
2053 if (!DCI
.isCalledByLegalizer())
2054 DCI
.AddToWorklist(N0
.getNode());
2055 // FIXME: If running after legalize, we probably can't do this.
2056 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, N0
);
2061 // Could not fold it.
2065 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
2066 /// node is a GlobalAddress + offset.
2067 bool TargetLowering::isGAPlusOffset(SDNode
*N
, GlobalValue
* &GA
,
2068 int64_t &Offset
) const {
2069 if (isa
<GlobalAddressSDNode
>(N
)) {
2070 GlobalAddressSDNode
*GASD
= cast
<GlobalAddressSDNode
>(N
);
2071 GA
= GASD
->getGlobal();
2072 Offset
+= GASD
->getOffset();
2076 if (N
->getOpcode() == ISD::ADD
) {
2077 SDValue N1
= N
->getOperand(0);
2078 SDValue N2
= N
->getOperand(1);
2079 if (isGAPlusOffset(N1
.getNode(), GA
, Offset
)) {
2080 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(N2
);
2082 Offset
+= V
->getSExtValue();
2085 } else if (isGAPlusOffset(N2
.getNode(), GA
, Offset
)) {
2086 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(N1
);
2088 Offset
+= V
->getSExtValue();
2097 /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
2098 /// location that is 'Dist' units away from the location that the 'Base' load
2099 /// is loading from.
2100 bool TargetLowering::isConsecutiveLoad(LoadSDNode
*LD
, LoadSDNode
*Base
,
2101 unsigned Bytes
, int Dist
,
2102 const MachineFrameInfo
*MFI
) const {
2103 if (LD
->getChain() != Base
->getChain())
2105 MVT VT
= LD
->getValueType(0);
2106 if (VT
.getSizeInBits() / 8 != Bytes
)
2109 SDValue Loc
= LD
->getOperand(1);
2110 SDValue BaseLoc
= Base
->getOperand(1);
2111 if (Loc
.getOpcode() == ISD::FrameIndex
) {
2112 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
2114 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
2115 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
2116 int FS
= MFI
->getObjectSize(FI
);
2117 int BFS
= MFI
->getObjectSize(BFI
);
2118 if (FS
!= BFS
|| FS
!= (int)Bytes
) return false;
2119 return MFI
->getObjectOffset(FI
) == (MFI
->getObjectOffset(BFI
) + Dist
*Bytes
);
2121 if (Loc
.getOpcode() == ISD::ADD
&& Loc
.getOperand(0) == BaseLoc
) {
2122 ConstantSDNode
*V
= dyn_cast
<ConstantSDNode
>(Loc
.getOperand(1));
2123 if (V
&& (V
->getSExtValue() == Dist
*Bytes
))
2127 GlobalValue
*GV1
= NULL
;
2128 GlobalValue
*GV2
= NULL
;
2129 int64_t Offset1
= 0;
2130 int64_t Offset2
= 0;
2131 bool isGA1
= isGAPlusOffset(Loc
.getNode(), GV1
, Offset1
);
2132 bool isGA2
= isGAPlusOffset(BaseLoc
.getNode(), GV2
, Offset2
);
2133 if (isGA1
&& isGA2
&& GV1
== GV2
)
2134 return Offset1
== (Offset2
+ Dist
*Bytes
);
2139 SDValue
TargetLowering::
2140 PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
2141 // Default implementation: no optimization.
2145 //===----------------------------------------------------------------------===//
2146 // Inline Assembler Implementation Methods
2147 //===----------------------------------------------------------------------===//
2150 TargetLowering::ConstraintType
2151 TargetLowering::getConstraintType(const std::string
&Constraint
) const {
2152 // FIXME: lots more standard ones to handle.
2153 if (Constraint
.size() == 1) {
2154 switch (Constraint
[0]) {
2156 case 'r': return C_RegisterClass
;
2158 case 'o': // offsetable
2159 case 'V': // not offsetable
2161 case 'i': // Simple Integer or Relocatable Constant
2162 case 'n': // Simple Integer
2163 case 's': // Relocatable Constant
2164 case 'X': // Allow ANY value.
2165 case 'I': // Target registers.
2177 if (Constraint
.size() > 1 && Constraint
[0] == '{' &&
2178 Constraint
[Constraint
.size()-1] == '}')
2183 /// LowerXConstraint - try to replace an X constraint, which matches anything,
2184 /// with another that has more specific requirements based on the type of the
2185 /// corresponding operand.
2186 const char *TargetLowering::LowerXConstraint(MVT ConstraintVT
) const{
2187 if (ConstraintVT
.isInteger())
2189 if (ConstraintVT
.isFloatingPoint())
2190 return "f"; // works for many targets
2194 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
2195 /// vector. If it is invalid, don't add anything to Ops.
2196 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
2197 char ConstraintLetter
,
2199 std::vector
<SDValue
> &Ops
,
2200 SelectionDAG
&DAG
) const {
2201 switch (ConstraintLetter
) {
2203 case 'X': // Allows any operand; labels (basic block) use this.
2204 if (Op
.getOpcode() == ISD::BasicBlock
) {
2209 case 'i': // Simple Integer or Relocatable Constant
2210 case 'n': // Simple Integer
2211 case 's': { // Relocatable Constant
2212 // These operands are interested in values of the form (GV+C), where C may
2213 // be folded in as an offset of GV, or it may be explicitly added. Also, it
2214 // is possible and fine if either GV or C are missing.
2215 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
2216 GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
);
2218 // If we have "(add GV, C)", pull out GV/C
2219 if (Op
.getOpcode() == ISD::ADD
) {
2220 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
2221 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(0));
2222 if (C
== 0 || GA
== 0) {
2223 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(0));
2224 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(1));
2226 if (C
== 0 || GA
== 0)
2230 // If we find a valid operand, map to the TargetXXX version so that the
2231 // value itself doesn't get selected.
2232 if (GA
) { // Either &GV or &GV+C
2233 if (ConstraintLetter
!= 'n') {
2234 int64_t Offs
= GA
->getOffset();
2235 if (C
) Offs
+= C
->getZExtValue();
2236 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(),
2237 Op
.getValueType(), Offs
));
2241 if (C
) { // just C, no GV.
2242 // Simple constants are not allowed for 's'.
2243 if (ConstraintLetter
!= 's') {
2244 // gcc prints these as sign extended. Sign extend value to 64 bits
2245 // now; without this it would get ZExt'd later in
2246 // ScheduleDAGSDNodes::EmitNode, which is very generic.
2247 Ops
.push_back(DAG
.getTargetConstant(C
->getAPIntValue().getSExtValue(),
2257 std::vector
<unsigned> TargetLowering::
2258 getRegClassForInlineAsmConstraint(const std::string
&Constraint
,
2260 return std::vector
<unsigned>();
2264 std::pair
<unsigned, const TargetRegisterClass
*> TargetLowering::
2265 getRegForInlineAsmConstraint(const std::string
&Constraint
,
2267 if (Constraint
[0] != '{')
2268 return std::pair
<unsigned, const TargetRegisterClass
*>(0, 0);
2269 assert(*(Constraint
.end()-1) == '}' && "Not a brace enclosed constraint?");
2271 // Remove the braces from around the name.
2272 std::string
RegName(Constraint
.begin()+1, Constraint
.end()-1);
2274 // Figure out which register class contains this reg.
2275 const TargetRegisterInfo
*RI
= TM
.getRegisterInfo();
2276 for (TargetRegisterInfo::regclass_iterator RCI
= RI
->regclass_begin(),
2277 E
= RI
->regclass_end(); RCI
!= E
; ++RCI
) {
2278 const TargetRegisterClass
*RC
= *RCI
;
2280 // If none of the the value types for this register class are valid, we
2281 // can't use it. For example, 64-bit reg classes on 32-bit targets.
2282 bool isLegal
= false;
2283 for (TargetRegisterClass::vt_iterator I
= RC
->vt_begin(), E
= RC
->vt_end();
2285 if (isTypeLegal(*I
)) {
2291 if (!isLegal
) continue;
2293 for (TargetRegisterClass::iterator I
= RC
->begin(), E
= RC
->end();
2295 if (StringsEqualNoCase(RegName
, RI
->get(*I
).AsmName
))
2296 return std::make_pair(*I
, RC
);
2300 return std::pair
<unsigned, const TargetRegisterClass
*>(0, 0);
2303 //===----------------------------------------------------------------------===//
2304 // Constraint Selection.
2306 /// isMatchingInputConstraint - Return true of this is an input operand that is
2307 /// a matching constraint like "4".
2308 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
2309 assert(!ConstraintCode
.empty() && "No known constraint!");
2310 return isdigit(ConstraintCode
[0]);
2313 /// getMatchedOperand - If this is an input matching constraint, this method
2314 /// returns the output operand it matches.
2315 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
2316 assert(!ConstraintCode
.empty() && "No known constraint!");
2317 return atoi(ConstraintCode
.c_str());
2321 /// getConstraintGenerality - Return an integer indicating how general CT
2323 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT
) {
2325 default: llvm_unreachable("Unknown constraint type!");
2326 case TargetLowering::C_Other
:
2327 case TargetLowering::C_Unknown
:
2329 case TargetLowering::C_Register
:
2331 case TargetLowering::C_RegisterClass
:
2333 case TargetLowering::C_Memory
:
2338 /// ChooseConstraint - If there are multiple different constraints that we
2339 /// could pick for this operand (e.g. "imr") try to pick the 'best' one.
2340 /// This is somewhat tricky: constraints fall into four classes:
2341 /// Other -> immediates and magic values
2342 /// Register -> one specific register
2343 /// RegisterClass -> a group of regs
2344 /// Memory -> memory
2345 /// Ideally, we would pick the most specific constraint possible: if we have
2346 /// something that fits into a register, we would pick it. The problem here
2347 /// is that if we have something that could either be in a register or in
2348 /// memory that use of the register could cause selection of *other*
2349 /// operands to fail: they might only succeed if we pick memory. Because of
2350 /// this the heuristic we use is:
2352 /// 1) If there is an 'other' constraint, and if the operand is valid for
2353 /// that constraint, use it. This makes us take advantage of 'i'
2354 /// constraints when available.
2355 /// 2) Otherwise, pick the most general constraint present. This prefers
2356 /// 'm' over 'r', for example.
2358 static void ChooseConstraint(TargetLowering::AsmOperandInfo
&OpInfo
,
2359 bool hasMemory
, const TargetLowering
&TLI
,
2360 SDValue Op
, SelectionDAG
*DAG
) {
2361 assert(OpInfo
.Codes
.size() > 1 && "Doesn't have multiple constraint options");
2362 unsigned BestIdx
= 0;
2363 TargetLowering::ConstraintType BestType
= TargetLowering::C_Unknown
;
2364 int BestGenerality
= -1;
2366 // Loop over the options, keeping track of the most general one.
2367 for (unsigned i
= 0, e
= OpInfo
.Codes
.size(); i
!= e
; ++i
) {
2368 TargetLowering::ConstraintType CType
=
2369 TLI
.getConstraintType(OpInfo
.Codes
[i
]);
2371 // If this is an 'other' constraint, see if the operand is valid for it.
2372 // For example, on X86 we might have an 'rI' constraint. If the operand
2373 // is an integer in the range [0..31] we want to use I (saving a load
2374 // of a register), otherwise we must use 'r'.
2375 if (CType
== TargetLowering::C_Other
&& Op
.getNode()) {
2376 assert(OpInfo
.Codes
[i
].size() == 1 &&
2377 "Unhandled multi-letter 'other' constraint");
2378 std::vector
<SDValue
> ResultOps
;
2379 TLI
.LowerAsmOperandForConstraint(Op
, OpInfo
.Codes
[i
][0], hasMemory
,
2381 if (!ResultOps
.empty()) {
2388 // This constraint letter is more general than the previous one, use it.
2389 int Generality
= getConstraintGenerality(CType
);
2390 if (Generality
> BestGenerality
) {
2393 BestGenerality
= Generality
;
2397 OpInfo
.ConstraintCode
= OpInfo
.Codes
[BestIdx
];
2398 OpInfo
.ConstraintType
= BestType
;
2401 /// ComputeConstraintToUse - Determines the constraint code and constraint
2402 /// type to use for the specific AsmOperandInfo, setting
2403 /// OpInfo.ConstraintCode and OpInfo.ConstraintType.
2404 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo
&OpInfo
,
2407 SelectionDAG
*DAG
) const {
2408 assert(!OpInfo
.Codes
.empty() && "Must have at least one constraint");
2410 // Single-letter constraints ('r') are very common.
2411 if (OpInfo
.Codes
.size() == 1) {
2412 OpInfo
.ConstraintCode
= OpInfo
.Codes
[0];
2413 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
2415 ChooseConstraint(OpInfo
, hasMemory
, *this, Op
, DAG
);
2418 // 'X' matches anything.
2419 if (OpInfo
.ConstraintCode
== "X" && OpInfo
.CallOperandVal
) {
2420 // Labels and constants are handled elsewhere ('X' is the only thing
2421 // that matches labels). For Functions, the type here is the type of
2422 // the result, which is not what we want to look at; leave them alone.
2423 Value
*v
= OpInfo
.CallOperandVal
;
2424 if (isa
<BasicBlock
>(v
) || isa
<ConstantInt
>(v
) || isa
<Function
>(v
)) {
2425 OpInfo
.CallOperandVal
= v
;
2429 // Otherwise, try to resolve it to something we know about by looking at
2430 // the actual operand type.
2431 if (const char *Repl
= LowerXConstraint(OpInfo
.ConstraintVT
)) {
2432 OpInfo
.ConstraintCode
= Repl
;
2433 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
2438 //===----------------------------------------------------------------------===//
2439 // Loop Strength Reduction hooks
2440 //===----------------------------------------------------------------------===//
2442 /// isLegalAddressingMode - Return true if the addressing mode represented
2443 /// by AM is legal for this target, for a load/store of the specified type.
2444 bool TargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
2445 const Type
*Ty
) const {
2446 // The default implementation of this implements a conservative RISCy, r+r and
2449 // Allows a sign-extended 16-bit immediate field.
2450 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
2453 // No global is ever allowed as a base.
2457 // Only support r+r,
2459 case 0: // "r+i" or just "i", depending on HasBaseReg.
2462 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
2464 // Otherwise we have r+r or r+i.
2467 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
2469 // Allow 2*r as r+r.
2476 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
2477 /// return a DAG expression to select that will generate the same value by
2478 /// multiplying by a magic number. See:
2479 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
2480 SDValue
TargetLowering::BuildSDIV(SDNode
*N
, SelectionDAG
&DAG
,
2481 std::vector
<SDNode
*>* Created
) const {
2482 MVT VT
= N
->getValueType(0);
2483 DebugLoc dl
= N
->getDebugLoc();
2485 // Check to see if we can do this.
2486 // FIXME: We should be more aggressive here.
2487 if (!isTypeLegal(VT
))
2490 APInt d
= cast
<ConstantSDNode
>(N
->getOperand(1))->getAPIntValue();
2491 APInt::ms magics
= d
.magic();
2493 // Multiply the numerator (operand 0) by the magic value
2494 // FIXME: We should support doing a MUL in a wider type
2496 if (isOperationLegalOrCustom(ISD::MULHS
, VT
))
2497 Q
= DAG
.getNode(ISD::MULHS
, dl
, VT
, N
->getOperand(0),
2498 DAG
.getConstant(magics
.m
, VT
));
2499 else if (isOperationLegalOrCustom(ISD::SMUL_LOHI
, VT
))
2500 Q
= SDValue(DAG
.getNode(ISD::SMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
),
2502 DAG
.getConstant(magics
.m
, VT
)).getNode(), 1);
2504 return SDValue(); // No mulhs or equvialent
2505 // If d > 0 and m < 0, add the numerator
2506 if (d
.isStrictlyPositive() && magics
.m
.isNegative()) {
2507 Q
= DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, N
->getOperand(0));
2509 Created
->push_back(Q
.getNode());
2511 // If d < 0 and m > 0, subtract the numerator.
2512 if (d
.isNegative() && magics
.m
.isStrictlyPositive()) {
2513 Q
= DAG
.getNode(ISD::SUB
, dl
, VT
, Q
, N
->getOperand(0));
2515 Created
->push_back(Q
.getNode());
2517 // Shift right algebraic if shift value is nonzero
2519 Q
= DAG
.getNode(ISD::SRA
, dl
, VT
, Q
,
2520 DAG
.getConstant(magics
.s
, getShiftAmountTy()));
2522 Created
->push_back(Q
.getNode());
2524 // Extract the sign bit and add it to the quotient
2526 DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, DAG
.getConstant(VT
.getSizeInBits()-1,
2527 getShiftAmountTy()));
2529 Created
->push_back(T
.getNode());
2530 return DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, T
);
2533 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
2534 /// return a DAG expression to select that will generate the same value by
2535 /// multiplying by a magic number. See:
2536 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
2537 SDValue
TargetLowering::BuildUDIV(SDNode
*N
, SelectionDAG
&DAG
,
2538 std::vector
<SDNode
*>* Created
) const {
2539 MVT VT
= N
->getValueType(0);
2540 DebugLoc dl
= N
->getDebugLoc();
2542 // Check to see if we can do this.
2543 // FIXME: We should be more aggressive here.
2544 if (!isTypeLegal(VT
))
2547 // FIXME: We should use a narrower constant when the upper
2548 // bits are known to be zero.
2549 ConstantSDNode
*N1C
= cast
<ConstantSDNode
>(N
->getOperand(1));
2550 APInt::mu magics
= N1C
->getAPIntValue().magicu();
2552 // Multiply the numerator (operand 0) by the magic value
2553 // FIXME: We should support doing a MUL in a wider type
2555 if (isOperationLegalOrCustom(ISD::MULHU
, VT
))
2556 Q
= DAG
.getNode(ISD::MULHU
, dl
, VT
, N
->getOperand(0),
2557 DAG
.getConstant(magics
.m
, VT
));
2558 else if (isOperationLegalOrCustom(ISD::UMUL_LOHI
, VT
))
2559 Q
= SDValue(DAG
.getNode(ISD::UMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
),
2561 DAG
.getConstant(magics
.m
, VT
)).getNode(), 1);
2563 return SDValue(); // No mulhu or equvialent
2565 Created
->push_back(Q
.getNode());
2567 if (magics
.a
== 0) {
2568 assert(magics
.s
< N1C
->getAPIntValue().getBitWidth() &&
2569 "We shouldn't generate an undefined shift!");
2570 return DAG
.getNode(ISD::SRL
, dl
, VT
, Q
,
2571 DAG
.getConstant(magics
.s
, getShiftAmountTy()));
2573 SDValue NPQ
= DAG
.getNode(ISD::SUB
, dl
, VT
, N
->getOperand(0), Q
);
2575 Created
->push_back(NPQ
.getNode());
2576 NPQ
= DAG
.getNode(ISD::SRL
, dl
, VT
, NPQ
,
2577 DAG
.getConstant(1, getShiftAmountTy()));
2579 Created
->push_back(NPQ
.getNode());
2580 NPQ
= DAG
.getNode(ISD::ADD
, dl
, VT
, NPQ
, Q
);
2582 Created
->push_back(NPQ
.getNode());
2583 return DAG
.getNode(ISD::SRL
, dl
, VT
, NPQ
,
2584 DAG
.getConstant(magics
.s
-1, getShiftAmountTy()));