1 ; RUN: llc -march=hexagon -hexagon-bit=0 < %s | FileCheck %s
3 ; This spill should be eliminated.
4 ; CHECK-NOT: vmem(r29+#6)
6 define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 {
8 %0 = bitcast i8* %key to <32 x i32>*
9 %1 = bitcast i8* %data1 to <32 x i32>*
13 %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ]
14 %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ]
15 %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ]
16 %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ]
17 %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128
18 %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128
19 %4 = load <32 x i32>, <32 x i32>* undef, align 128
20 %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2
21 %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128
22 %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2
23 %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128
24 %7 = load <32 x i32>, <32 x i32>* null, align 128
25 %8 = load <32 x i32>, <32 x i32>* undef, align 128
26 %9 = load <32 x i32>, <32 x i32>* null, align 128
27 %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3
28 %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6
29 %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128
30 %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4
31 %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128
32 %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8
33 %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8
34 %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128
35 %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5
36 %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128
37 %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10
38 %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10
39 %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128
40 %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6
41 %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128
42 %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12
43 %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128
44 %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12
45 %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128
46 %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14
47 %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
48 %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
49 %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
50 %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
51 %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
52 %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
53 %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
54 %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
55 %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
56 %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
57 %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
58 %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
59 %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
60 %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
61 %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
62 %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
63 %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
64 %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
65 %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
66 %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
67 %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
68 %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
69 %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
70 %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
71 %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
72 %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
73 %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
74 %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
75 %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
76 %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
77 %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
78 %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
79 %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
80 %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
81 %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
82 %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
83 %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
84 %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
85 %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
86 %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
87 %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
88 %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
89 %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
90 %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
91 %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
92 %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
93 %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
94 %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
95 %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
96 %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
97 %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
98 %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
99 %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
100 %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
101 %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
102 %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
103 %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
104 %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
105 %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
106 %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
107 %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
108 %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer)
109 %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537)
110 %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537)
111 %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537)
112 %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537)
113 %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537)
114 %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537)
115 %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537)
116 %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537)
117 store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128
118 store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128
119 store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128
120 store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128
121 store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128
122 store <32 x i32> zeroinitializer, <32 x i32>* null, align 128
123 %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16
124 %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16
125 br i1 false, label %for.end, label %for.body
128 %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86)
132 declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
134 declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
136 declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
138 declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1
140 declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
142 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
143 attributes #1 = { nounwind readnone }