1 ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s
3 ; This test used to crash with the following assertion:
4 ; llc: include/llvm/ADT/IntervalMap.h:632: unsigned int llvm::IntervalMapImpl::LeafNode<llvm::SlotIndex, llvm::LiveInterval *, 8, llvm::IntervalMapInfo<llvm::SlotIndex> >::insertFrom(unsigned int &, unsigned int, KeyT, KeyT, ValT) [KeyT = llvm::SlotIndex, ValT = llvm::LiveInterval *, N = 8, Traits = llvm::IntervalMapInfo<llvm::SlotIndex>]: Assertion `(i == Size || Traits::stopLess(b, start(i))) && "Overlapping insert"' failed.
6 ; This was related to incorrectly calculating subregister live ranges
7 ; (i.e. live interval subranges): subregister defs are not uses for that
10 ; Check for a valid output.
11 ; CHECK: image_sample_c
12 define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main(ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg1, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg2, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg3, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 {
14 %i.i = extractelement <2 x i32> %arg8, i32 0
15 %j.i = extractelement <2 x i32> %arg8, i32 1
16 %i.f.i = bitcast i32 %i.i to float
17 %j.f.i = bitcast i32 %j.i to float
18 %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
19 %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
20 %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
22 %tmp24 = extractelement <4 x float> %tmp23, i32 3
23 %tmp25 = fmul float %tmp24, %tmp24
24 %tmp26 = fmul float %p2.i, %p2.i
25 %tmp27 = fadd float %tmp26, %tmp26
26 %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
27 %tmp33 = extractelement <4 x float> %tmp32, i32 0
28 %tmp34 = fadd float %tmp33, %tmp33
29 %tmp35 = fadd float %tmp34, %tmp34
30 %tmp36 = fadd float %tmp35, %tmp35
31 %tmp37 = fadd float %tmp36, %tmp36
32 %tmp38 = fadd float %tmp37, %tmp37
33 %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
34 %tmp40 = extractelement <4 x float> %tmp39, i32 0
35 %tmp41 = extractelement <4 x float> %tmp39, i32 1
36 %tmp42 = extractelement <4 x float> %tmp39, i32 2
37 %tmp43 = extractelement <4 x float> %tmp39, i32 3
38 %tmp44 = fmul float %tmp40, %tmp40
39 %tmp45 = fmul float %tmp41, %tmp41
40 %tmp46 = fmul float %tmp42, %tmp41
41 %tmp47 = fmul float %tmp43, %tmp43
42 %tmp48 = fadd float %tmp44, %tmp44
43 %tmp49 = fadd float %tmp45, %tmp45
44 %tmp50 = bitcast float %tmp27 to i32
45 %tmp51 = bitcast float %tmp48 to i32
46 %tmp52 = bitcast float %tmp49 to i32
47 %tmp53 = insertelement <4 x i32> undef, i32 %tmp50, i32 0
48 %tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
49 %tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
50 %tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
51 %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
52 %tmp57 = extractelement <4 x float> %tmp56, i32 0
53 %tmp58 = fadd float %tmp38, %tmp57
54 %tmp59 = fadd float %tmp46, %tmp46
55 %tmp60 = fadd float %tmp47, %tmp47
56 %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
57 %tmp66 = extractelement <4 x float> %tmp65, i32 0
58 %tmp67 = fadd float %tmp58, %tmp66
59 %tmp68 = fmul float %tmp67, 1.250000e-01
60 %tmp69 = fmul float %tmp68, %tmp68
61 %tmp70 = fcmp une float %tmp69, 0.000000e+00
62 br i1 %tmp70, label %IF26, label %ENDIF25
64 IF26: ; preds = %main_body
65 %tmp71 = bitcast float %tmp27 to i32
66 %tmp72 = insertelement <4 x i32> undef, i32 %tmp71, i32 0
69 ENDIF25: ; preds = %IF29, %main_body
70 %.4 = phi float [ %tmp84, %IF29 ], [ %tmp68, %main_body ]
71 %tmp73 = fadd float %.4, %.4
72 %max.0.i = call float @llvm.maxnum.f32(float %tmp73, float 0.000000e+00)
73 %clamp.i = call float @llvm.minnum.f32(float %max.0.i, float 1.000000e+00)
74 %tmp75 = fmul float %clamp.i, %clamp.i
75 %tmp76 = fmul float %tmp75, %tmp75
76 %tmp77 = fadd float %tmp76, %tmp76
77 %tmp78 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, float %tmp77, 11
78 %tmp79 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp78, float undef, 12
79 %tmp80 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp79, float undef, 13
80 %tmp81 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp80, float %tmp25, 14
81 %tmp82 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp81, float undef, 15
82 %tmp83 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp82, float %arg21, 24
83 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp83
85 LOOP: ; preds = %ENDIF28, %IF26
86 %.5 = phi float [ undef, %IF26 ], [ %tmp89, %ENDIF28 ]
87 br i1 false, label %IF29, label %ENDIF28
90 %tmp84 = fmul float %.5, 3.125000e-02
93 ENDIF28: ; preds = %LOOP
94 %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
95 %tmp88 = extractelement <4 x float> %tmp87, i32 0
96 %tmp89 = fadd float %tmp88, %tmp88
100 declare float @llvm.minnum.f32(float, float) #1
101 declare float @llvm.maxnum.f32(float, float) #1
102 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
103 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
104 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
105 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
107 attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
108 attributes #1 = { nounwind readnone }
109 attributes #2 = { nounwind readonly }
110 attributes #3 = { nounwind }