1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v,+zvl256b | FileCheck %s --check-prefixes=CHECK,V
3 ; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+f,+zve32f,+zvl256b | FileCheck %s --check-prefixes=CHECK,ZVE32F
5 %struct.foo = type { i32, i32, i32, i32 }
7 ; void gather(signed char * __restrict A, signed char * __restrict B) {
8 ; for (int i = 0; i != 1024; ++i)
11 define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
12 ; CHECK-LABEL: @gather(
14 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
16 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
17 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
18 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
19 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
20 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
21 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
22 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
23 ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1
24 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
25 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
26 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
27 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
28 ; CHECK: for.cond.cleanup:
29 ; CHECK-NEXT: ret void
34 vector.body: ; preds = %vector.body, %entry
35 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
36 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ]
37 %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
38 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i
39 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
40 %i2 = getelementptr inbounds i8, ptr %A, i64 %index
41 %wide.load = load <32 x i8>, ptr %i2, align 1
42 %i4 = add <32 x i8> %wide.load, %wide.masked.gather
43 store <32 x i8> %i4, ptr %i2, align 1
44 %index.next = add nuw i64 %index, 32
45 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
46 %i6 = icmp eq i64 %index.next, 1024
47 br i1 %i6, label %for.cond.cleanup, label %vector.body
49 for.cond.cleanup: ; preds = %vector.body
53 define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) {
54 ; CHECK-LABEL: @gather_masked(
56 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
58 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
59 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
60 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
61 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> [[MASKEDOFF:%.*]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
62 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
63 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
64 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
65 ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1
66 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
67 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
68 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
69 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
70 ; CHECK: for.cond.cleanup:
71 ; CHECK-NEXT: ret void
76 vector.body: ; preds = %vector.body, %entry
77 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
78 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ]
79 %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
80 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i
81 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %maskedoff)
82 %i2 = getelementptr inbounds i8, ptr %A, i64 %index
83 %wide.load = load <32 x i8>, ptr %i2, align 1
84 %i4 = add <32 x i8> %wide.load, %wide.masked.gather
85 store <32 x i8> %i4, ptr %i2, align 1
86 %index.next = add nuw i64 %index, 32
87 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
88 %i6 = icmp eq i64 %index.next, 1024
89 br i1 %i6, label %for.cond.cleanup, label %vector.body
91 for.cond.cleanup: ; preds = %vector.body
95 define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
96 ; CHECK-LABEL: @gather_negative_stride(
98 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
100 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
101 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
102 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
103 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 -5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
104 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
105 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
106 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
107 ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1
108 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
109 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
110 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
111 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
112 ; CHECK: for.cond.cleanup:
113 ; CHECK-NEXT: ret void
116 br label %vector.body
118 vector.body: ; preds = %vector.body, %entry
119 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
120 %vec.ind = phi <32 x i64> [ <i64 31, i64 30, i64 29, i64 28, i64 27, i64 26, i64 25, i64 24, i64 23, i64 22, i64 21, i64 20, i64 19, i64 18, i64 17, i64 16, i64 15, i64 14, i64 13, i64 12, i64 11, i64 10, i64 9, i64 8, i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, %entry ], [ %vec.ind.next, %vector.body ]
121 %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
122 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i
123 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
124 %i2 = getelementptr inbounds i8, ptr %A, i64 %index
125 %wide.load = load <32 x i8>, ptr %i2, align 1
126 %i4 = add <32 x i8> %wide.load, %wide.masked.gather
127 store <32 x i8> %i4, ptr %i2, align 1
128 %index.next = add nuw i64 %index, 32
129 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
130 %i6 = icmp eq i64 %index.next, 1024
131 br i1 %i6, label %for.cond.cleanup, label %vector.body
133 for.cond.cleanup: ; preds = %vector.body
137 define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
138 ; CHECK-LABEL: @gather_zero_stride(
140 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
141 ; CHECK: vector.body:
142 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
143 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
144 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
145 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 0, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
146 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
147 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
148 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
149 ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1
150 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
151 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
152 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
153 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
154 ; CHECK: for.cond.cleanup:
155 ; CHECK-NEXT: ret void
158 br label %vector.body
160 vector.body: ; preds = %vector.body, %entry
161 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
162 %vec.ind = phi <32 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]
163 %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
164 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i
165 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
166 %i2 = getelementptr inbounds i8, ptr %A, i64 %index
167 %wide.load = load <32 x i8>, ptr %i2, align 1
168 %i4 = add <32 x i8> %wide.load, %wide.masked.gather
169 store <32 x i8> %i4, ptr %i2, align 1
170 %index.next = add nuw i64 %index, 32
171 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
172 %i6 = icmp eq i64 %index.next, 1024
173 br i1 %i6, label %for.cond.cleanup, label %vector.body
175 for.cond.cleanup: ; preds = %vector.body
179 ;void scatter(signed char * __restrict A, signed char * __restrict B) {
180 ; for (int i = 0; i < 1024; ++i)
183 define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
184 ; CHECK-LABEL: @scatter(
186 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
187 ; CHECK: vector.body:
188 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
189 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
190 ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]]
191 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1
192 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
193 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
194 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]]
195 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
196 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
197 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
198 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
199 ; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
200 ; CHECK: for.cond.cleanup:
201 ; CHECK-NEXT: ret void
204 br label %vector.body
206 vector.body: ; preds = %vector.body, %entry
207 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
208 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ]
209 %i = getelementptr inbounds i8, ptr %B, i64 %index
210 %wide.load = load <32 x i8>, ptr %i, align 1
211 %i2 = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
212 %i3 = getelementptr inbounds i8, ptr %A, <32 x i64> %i2
213 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i3, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
214 %i4 = add <32 x i8> %wide.masked.gather, %wide.load
215 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %i4, <32 x ptr> %i3, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
216 %index.next = add nuw i64 %index, 32
217 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
218 %i5 = icmp eq i64 %index.next, 1024
219 br i1 %i5, label %for.cond.cleanup, label %vector.body
221 for.cond.cleanup: ; preds = %vector.body
225 define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) {
226 ; CHECK-LABEL: @scatter_masked(
228 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
229 ; CHECK: vector.body:
230 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
231 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
232 ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]]
233 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1
234 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
235 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> [[MASKEDOFF:%.*]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
236 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]]
237 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
238 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
239 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
240 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
241 ; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
242 ; CHECK: for.cond.cleanup:
243 ; CHECK-NEXT: ret void
246 br label %vector.body
248 vector.body: ; preds = %vector.body, %entry
249 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
250 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ]
251 %i = getelementptr inbounds i8, ptr %B, i64 %index
252 %wide.load = load <32 x i8>, ptr %i, align 1
253 %i2 = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
254 %i3 = getelementptr inbounds i8, ptr %A, <32 x i64> %i2
255 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i3, i32 1, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %maskedoff)
256 %i4 = add <32 x i8> %wide.masked.gather, %wide.load
257 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %i4, <32 x ptr> %i3, i32 1, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
258 %index.next = add nuw i64 %index, 32
259 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
260 %i5 = icmp eq i64 %index.next, 1024
261 br i1 %i5, label %for.cond.cleanup, label %vector.body
263 for.cond.cleanup: ; preds = %vector.body
267 ; void gather_pow2(signed char * __restrict A, signed char * __restrict B) {
268 ; for (int i = 0; i != 1024; ++i)
271 define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
272 ; CHECK-LABEL: @gather_pow2(
274 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
275 ; CHECK: vector.body:
276 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
277 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
278 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
279 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
280 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
281 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1
282 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
283 ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1
284 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
285 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32
286 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
287 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
288 ; CHECK: for.cond.cleanup:
289 ; CHECK-NEXT: ret void
292 br label %vector.body
294 vector.body: ; preds = %vector.body, %entry
295 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
296 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ]
297 %i = shl nsw <8 x i64> %vec.ind, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
298 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i
299 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
300 %i2 = getelementptr inbounds i32, ptr %A, i64 %index
301 %wide.load = load <8 x i32>, ptr %i2, align 1
302 %i4 = add <8 x i32> %wide.load, %wide.masked.gather
303 store <8 x i32> %i4, ptr %i2, align 1
304 %index.next = add nuw i64 %index, 8
305 %vec.ind.next = add <8 x i64> %vec.ind, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
306 %i6 = icmp eq i64 %index.next, 1024
307 br i1 %i6, label %for.cond.cleanup, label %vector.body
309 for.cond.cleanup: ; preds = %vector.body
313 define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %shift) {
314 ; CHECK-LABEL: @gather_unknown_pow2(
316 ; CHECK-NEXT: [[STEP:%.*]] = shl i64 8, [[SHIFT:%.*]]
317 ; CHECK-NEXT: [[STRIDE:%.*]] = shl i64 1, [[SHIFT]]
318 ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STRIDE]], 4
319 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
320 ; CHECK: vector.body:
321 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
322 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
323 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
324 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 [[TMP0]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
325 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
326 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1
327 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
328 ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1
329 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
330 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]]
331 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
332 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
333 ; CHECK: for.cond.cleanup:
334 ; CHECK-NEXT: ret void
337 %.splatinsert = insertelement <8 x i64> poison, i64 %shift, i64 0
338 %.splat = shufflevector <8 x i64> %.splatinsert, <8 x i64> poison, <8 x i32> zeroinitializer
339 br label %vector.body
341 vector.body: ; preds = %vector.body, %entry
342 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
343 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ]
344 %i = shl nsw <8 x i64> %vec.ind, %.splat
345 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i
346 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
347 %i2 = getelementptr inbounds i32, ptr %A, i64 %index
348 %wide.load = load <8 x i32>, ptr %i2, align 1
349 %i4 = add <8 x i32> %wide.load, %wide.masked.gather
350 store <8 x i32> %i4, ptr %i2, align 1
351 %index.next = add nuw i64 %index, 8
352 %vec.ind.next = add <8 x i64> %vec.ind, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
353 %i6 = icmp eq i64 %index.next, 1024
354 br i1 %i6, label %for.cond.cleanup, label %vector.body
356 for.cond.cleanup: ; preds = %vector.body
360 define void @negative_shl_non_commute(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %shift) {
361 ; CHECK-LABEL: @negative_shl_non_commute(
363 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[SHIFT:%.*]], i64 0
364 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
365 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
366 ; CHECK: vector.body:
367 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
368 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
369 ; CHECK-NEXT: [[I:%.*]] = shl nsw <8 x i64> [[DOTSPLAT]], [[VEC_IND]]
370 ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <8 x i64> [[I]]
371 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[I1]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
372 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
373 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1
374 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
375 ; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1
376 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
377 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
378 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
379 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
380 ; CHECK: for.cond.cleanup:
381 ; CHECK-NEXT: ret void
384 %.splatinsert = insertelement <8 x i64> poison, i64 %shift, i64 0
385 %.splat = shufflevector <8 x i64> %.splatinsert, <8 x i64> poison, <8 x i32> zeroinitializer
386 br label %vector.body
388 vector.body: ; preds = %vector.body, %entry
389 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
390 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ]
391 %i = shl nsw <8 x i64> %.splat, %vec.ind
392 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i
393 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
394 %i2 = getelementptr inbounds i32, ptr %A, i64 %index
395 %wide.load = load <8 x i32>, ptr %i2, align 1
396 %i4 = add <8 x i32> %wide.load, %wide.masked.gather
397 store <8 x i32> %i4, ptr %i2, align 1
398 %index.next = add nuw i64 %index, 8
399 %vec.ind.next = add <8 x i64> %vec.ind, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
400 %i6 = icmp eq i64 %index.next, 1024
401 br i1 %i6, label %for.cond.cleanup, label %vector.body
403 for.cond.cleanup: ; preds = %vector.body
407 ;void scatter_pow2(signed char * __restrict A, signed char * __restrict B) {
408 ; for (int i = 0; i < 1024; ++i)
411 define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
412 ; CHECK-LABEL: @scatter_pow2(
414 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
415 ; CHECK: vector.body:
416 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
417 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
418 ; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
419 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1
420 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
421 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
422 ; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]]
423 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
424 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
425 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32
426 ; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
427 ; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
428 ; CHECK: for.cond.cleanup:
429 ; CHECK-NEXT: ret void
432 br label %vector.body
434 vector.body: ; preds = %vector.body, %entry
435 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
436 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ]
437 %i = getelementptr inbounds i32, ptr %B, i64 %index
438 %wide.load = load <8 x i32>, ptr %i, align 1
439 %i2 = shl nuw nsw <8 x i64> %vec.ind, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
440 %i3 = getelementptr inbounds i32, ptr %A, <8 x i64> %i2
441 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i3, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
442 %i4 = add <8 x i32> %wide.masked.gather, %wide.load
443 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i4, <8 x ptr> %i3, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
444 %index.next = add nuw i64 %index, 8
445 %vec.ind.next = add <8 x i64> %vec.ind, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
446 %i5 = icmp eq i64 %index.next, 1024
447 br i1 %i5, label %for.cond.cleanup, label %vector.body
449 for.cond.cleanup: ; preds = %vector.body
457 ;void struct_gather(int * __restrict A, struct foo * __restrict B) {
458 ; for (int i = 0; i < 1024; ++i)
461 define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
462 ; CHECK-LABEL: @struct_gather(
464 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
465 ; CHECK: vector.body:
466 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
467 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
468 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ]
469 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1
470 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1
471 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
472 ; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
473 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
474 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4
475 ; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8
476 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[I4]], align 4
477 ; CHECK-NEXT: [[I6:%.*]] = add nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
478 ; CHECK-NEXT: [[I7:%.*]] = add nsw <8 x i32> [[WIDE_LOAD10]], [[WIDE_MASKED_GATHER9]]
479 ; CHECK-NEXT: store <8 x i32> [[I6]], ptr [[I2]], align 4
480 ; CHECK-NEXT: store <8 x i32> [[I7]], ptr [[I4]], align 4
481 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
482 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 16
483 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 16
484 ; CHECK-NEXT: [[I10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
485 ; CHECK-NEXT: br i1 [[I10]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
486 ; CHECK: for.cond.cleanup:
487 ; CHECK-NEXT: ret void
490 br label %vector.body
492 vector.body: ; preds = %vector.body, %entry
493 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
494 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ]
495 %step.add = add <8 x i64> %vec.ind, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
496 %i = getelementptr inbounds %struct.foo, ptr %B, <8 x i64> %vec.ind, i32 1
497 %i1 = getelementptr inbounds %struct.foo, ptr %B, <8 x i64> %step.add, i32 1
498 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
499 %wide.masked.gather9 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
500 %i2 = getelementptr inbounds i32, ptr %A, i64 %index
501 %wide.load = load <8 x i32>, ptr %i2, align 4
502 %i4 = getelementptr inbounds i32, ptr %i2, i64 8
503 %wide.load10 = load <8 x i32>, ptr %i4, align 4
504 %i6 = add nsw <8 x i32> %wide.load, %wide.masked.gather
505 %i7 = add nsw <8 x i32> %wide.load10, %wide.masked.gather9
506 store <8 x i32> %i6, ptr %i2, align 4
507 store <8 x i32> %i7, ptr %i4, align 4
508 %index.next = add nuw i64 %index, 16
509 %vec.ind.next = add <8 x i64> %vec.ind, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
510 %i10 = icmp eq i64 %index.next, 1024
511 br i1 %i10, label %for.cond.cleanup, label %vector.body
513 for.cond.cleanup: ; preds = %vector.body
517 ;void gather_unroll(int * __restrict A, int * __restrict B) {
518 ; for (int i = 0; i < 1024; i+= 4 ) {
520 ; A[i+1] += B[(i+1) * 4];
521 ; A[i+2] += B[(i+2) * 4];
522 ; A[i+3] += B[(i+3) * 4];
525 define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
526 ; CHECK-LABEL: @gather_unroll(
528 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
529 ; CHECK: vector.body:
530 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
531 ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
532 ; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ]
533 ; CHECK-NEXT: [[VEC_IND_SCALAR3:%.*]] = phi i64 [ 4, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR4:%.*]], [[VECTOR_BODY]] ]
534 ; CHECK-NEXT: [[VEC_IND_SCALAR5:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR6:%.*]], [[VECTOR_BODY]] ]
535 ; CHECK-NEXT: [[VEC_IND_SCALAR7:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR8:%.*]], [[VECTOR_BODY]] ]
536 ; CHECK-NEXT: [[VEC_IND_SCALAR9:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR10:%.*]], [[VECTOR_BODY]] ]
537 ; CHECK-NEXT: [[VEC_IND_SCALAR11:%.*]] = phi i64 [ 12, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR12:%.*]], [[VECTOR_BODY]] ]
538 ; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ]
539 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
540 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
541 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]]
542 ; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
543 ; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]]
544 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
545 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]]
546 ; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP2]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
547 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]]
548 ; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP3]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
549 ; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]]
550 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP3]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
551 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]]
552 ; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP4]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
553 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]]
554 ; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP5]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
555 ; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]]
556 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP5]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
557 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]]
558 ; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP6]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
559 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]]
560 ; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP7]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
561 ; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]]
562 ; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP7]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
563 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
564 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128
565 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 32
566 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR4]] = add i64 [[VEC_IND_SCALAR3]], 128
567 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR6]] = add i64 [[VEC_IND_SCALAR5]], 32
568 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR8]] = add i64 [[VEC_IND_SCALAR7]], 128
569 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR10]] = add i64 [[VEC_IND_SCALAR9]], 32
570 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR12]] = add i64 [[VEC_IND_SCALAR11]], 128
571 ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR14]] = add i64 [[VEC_IND_SCALAR13]], 32
572 ; CHECK-NEXT: [[I19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
573 ; CHECK-NEXT: br i1 [[I19]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
574 ; CHECK: for.cond.cleanup:
575 ; CHECK-NEXT: ret void
578 br label %vector.body
580 vector.body: ; preds = %vector.body, %entry
581 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
582 %vec.ind = phi <8 x i64> [ <i64 0, i64 4, i64 8, i64 12, i64 16, i64 20, i64 24, i64 28>, %entry ], [ %vec.ind.next, %vector.body ]
583 %i = shl nuw nsw <8 x i64> %vec.ind, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
584 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i
585 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
586 %i2 = getelementptr inbounds i32, ptr %A, <8 x i64> %vec.ind
587 %wide.masked.gather52 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i2, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
588 %i3 = add nsw <8 x i32> %wide.masked.gather52, %wide.masked.gather
589 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i3, <8 x ptr> %i2, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
590 %i4 = or disjoint <8 x i64> %vec.ind, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
591 %i5 = shl nsw <8 x i64> %i4, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
592 %i6 = getelementptr inbounds i32, ptr %B, <8 x i64> %i5
593 %wide.masked.gather53 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i6, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
594 %i7 = getelementptr inbounds i32, ptr %A, <8 x i64> %i4
595 %wide.masked.gather54 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i7, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
596 %i8 = add nsw <8 x i32> %wide.masked.gather54, %wide.masked.gather53
597 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i8, <8 x ptr> %i7, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
598 %i9 = or disjoint <8 x i64> %vec.ind, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
599 %i10 = shl nsw <8 x i64> %i9, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
600 %i11 = getelementptr inbounds i32, ptr %B, <8 x i64> %i10
601 %wide.masked.gather55 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i11, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
602 %i12 = getelementptr inbounds i32, ptr %A, <8 x i64> %i9
603 %wide.masked.gather56 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i12, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
604 %i13 = add nsw <8 x i32> %wide.masked.gather56, %wide.masked.gather55
605 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i13, <8 x ptr> %i12, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
606 %i14 = or disjoint <8 x i64> %vec.ind, <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>
607 %i15 = shl nsw <8 x i64> %i14, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
608 %i16 = getelementptr inbounds i32, ptr %B, <8 x i64> %i15
609 %wide.masked.gather57 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i16, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
610 %i17 = getelementptr inbounds i32, ptr %A, <8 x i64> %i14
611 %wide.masked.gather58 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i17, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
612 %i18 = add nsw <8 x i32> %wide.masked.gather58, %wide.masked.gather57
613 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i18, <8 x ptr> %i17, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
614 %index.next = add nuw i64 %index, 8
615 %vec.ind.next = add <8 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
616 %i19 = icmp eq i64 %index.next, 256
617 br i1 %i19, label %for.cond.cleanup, label %vector.body
619 for.cond.cleanup: ; preds = %vector.body
623 declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32 immarg, <32 x i1>, <32 x i8>)
624 declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i32>)
625 declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32 immarg, <32 x i1>)
626 declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32 immarg, <8 x i1>)
628 ; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
629 define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1) {
630 ; V-LABEL: @gather_of_pointers(
632 ; V-NEXT: br label [[BB2:%.*]]
634 ; V-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ]
635 ; V-NEXT: [[I3_SCALAR:%.*]] = phi i64 [ 0, [[BB]] ], [ [[I16_SCALAR:%.*]], [[BB2]] ]
636 ; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ]
637 ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]]
638 ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]]
639 ; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.riscv.masked.strided.load.v2p0.p0.i64(<2 x ptr> undef, ptr [[TMP0]], i64 40, <2 x i1> <i1 true, i1 true>)
640 ; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.riscv.masked.strided.load.v2p0.p0.i64(<2 x ptr> undef, ptr [[TMP1]], i64 40, <2 x i1> <i1 true, i1 true>)
641 ; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]]
642 ; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8
643 ; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2
644 ; V-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8
645 ; V-NEXT: [[I15]] = add nuw i64 [[I]], 4
646 ; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20
647 ; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20
648 ; V-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024
649 ; V-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]]
653 ; ZVE32F-LABEL: @gather_of_pointers(
655 ; ZVE32F-NEXT: br label [[BB2:%.*]]
657 ; ZVE32F-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ]
658 ; ZVE32F-NEXT: [[I3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[BB]] ], [ [[I16:%.*]], [[BB2]] ]
659 ; ZVE32F-NEXT: [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], <i64 5, i64 5>
660 ; ZVE32F-NEXT: [[I5:%.*]] = mul <2 x i64> [[I3]], <i64 5, i64 5>
661 ; ZVE32F-NEXT: [[I6:%.*]] = add <2 x i64> [[I5]], <i64 10, i64 10>
662 ; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], <2 x i64> [[I4]]
663 ; ZVE32F-NEXT: [[I8:%.*]] = getelementptr inbounds ptr, ptr [[ARG1]], <2 x i64> [[I6]]
664 ; ZVE32F-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x ptr> undef)
665 ; ZVE32F-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I8]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x ptr> undef)
666 ; ZVE32F-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]]
667 ; ZVE32F-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8
668 ; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2
669 ; ZVE32F-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8
670 ; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4
671 ; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], <i64 4, i64 4>
672 ; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024
673 ; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]]
675 ; ZVE32F-NEXT: ret void
680 bb2: ; preds = %bb2, %bb
681 %i = phi i64 [ 0, %bb ], [ %i15, %bb2 ]
682 %i3 = phi <2 x i64> [ <i64 0, i64 1>, %bb ], [ %i16, %bb2 ]
683 %i4 = mul nuw nsw <2 x i64> %i3, <i64 5, i64 5>
684 %i5 = mul <2 x i64> %i3, <i64 5, i64 5>
685 %i6 = add <2 x i64> %i5, <i64 10, i64 10>
686 %i7 = getelementptr inbounds ptr, ptr %arg1, <2 x i64> %i4
687 %i8 = getelementptr inbounds ptr, ptr %arg1, <2 x i64> %i6
688 %i9 = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> %i7, i32 8, <2 x i1> <i1 true, i1 true>, <2 x ptr> undef)
689 %i10 = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> %i8, i32 8, <2 x i1> <i1 true, i1 true>, <2 x ptr> undef)
690 %i11 = getelementptr inbounds ptr, ptr %arg, i64 %i
691 store <2 x ptr> %i9, ptr %i11, align 8
692 %i13 = getelementptr inbounds ptr, ptr %i11, i64 2
693 store <2 x ptr> %i10, ptr %i13, align 8
694 %i15 = add nuw i64 %i, 4
695 %i16 = add <2 x i64> %i3, <i64 4, i64 4>
696 %i17 = icmp eq i64 %i15, 1024
697 br i1 %i17, label %bb18, label %bb2
703 declare <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr>, i32 immarg, <2 x i1>, <2 x ptr>)
705 ; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
706 define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1) {
707 ; V-LABEL: @scatter_of_pointers(
709 ; V-NEXT: br label [[BB2:%.*]]
711 ; V-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ]
712 ; V-NEXT: [[I3_SCALAR:%.*]] = phi i64 [ 0, [[BB]] ], [ [[I16_SCALAR:%.*]], [[BB2]] ]
713 ; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ]
714 ; V-NEXT: [[I4:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], i64 [[I]]
715 ; V-NEXT: [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8
716 ; V-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2
717 ; V-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8
718 ; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i64 [[I3_SCALAR]]
719 ; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG]], i64 [[I3_SCALAR1]]
720 ; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> <i1 true, i1 true>)
721 ; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> <i1 true, i1 true>)
722 ; V-NEXT: [[I15]] = add nuw i64 [[I]], 4
723 ; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20
724 ; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20
725 ; V-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024
726 ; V-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]]
730 ; ZVE32F-LABEL: @scatter_of_pointers(
732 ; ZVE32F-NEXT: br label [[BB2:%.*]]
734 ; ZVE32F-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ]
735 ; ZVE32F-NEXT: [[I3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[BB]] ], [ [[I16:%.*]], [[BB2]] ]
736 ; ZVE32F-NEXT: [[I4:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], i64 [[I]]
737 ; ZVE32F-NEXT: [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8
738 ; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2
739 ; ZVE32F-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8
740 ; ZVE32F-NEXT: [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], <i64 5, i64 5>
741 ; ZVE32F-NEXT: [[I11:%.*]] = mul <2 x i64> [[I3]], <i64 5, i64 5>
742 ; ZVE32F-NEXT: [[I12:%.*]] = add <2 x i64> [[I11]], <i64 10, i64 10>
743 ; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], <2 x i64> [[I10]]
744 ; ZVE32F-NEXT: [[I14:%.*]] = getelementptr inbounds ptr, ptr [[ARG]], <2 x i64> [[I12]]
745 ; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> <i1 true, i1 true>)
746 ; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> <i1 true, i1 true>)
747 ; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4
748 ; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], <i64 4, i64 4>
749 ; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024
750 ; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]]
752 ; ZVE32F-NEXT: ret void
757 bb2: ; preds = %bb2, %bb
758 %i = phi i64 [ 0, %bb ], [ %i15, %bb2 ]
759 %i3 = phi <2 x i64> [ <i64 0, i64 1>, %bb ], [ %i16, %bb2 ]
760 %i4 = getelementptr inbounds ptr, ptr %arg1, i64 %i
761 %i6 = load <2 x ptr>, ptr %i4, align 8
762 %i7 = getelementptr inbounds ptr, ptr %i4, i64 2
763 %i9 = load <2 x ptr>, ptr %i7, align 8
764 %i10 = mul nuw nsw <2 x i64> %i3, <i64 5, i64 5>
765 %i11 = mul <2 x i64> %i3, <i64 5, i64 5>
766 %i12 = add <2 x i64> %i11, <i64 10, i64 10>
767 %i13 = getelementptr inbounds ptr, ptr %arg, <2 x i64> %i10
768 %i14 = getelementptr inbounds ptr, ptr %arg, <2 x i64> %i12
769 call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> %i6, <2 x ptr> %i13, i32 8, <2 x i1> <i1 true, i1 true>)
770 call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> %i9, <2 x ptr> %i14, i32 8, <2 x i1> <i1 true, i1 true>)
771 %i15 = add nuw i64 %i, 4
772 %i16 = add <2 x i64> %i3, <i64 4, i64 4>
773 %i17 = icmp eq i64 %i15, 1024
774 br i1 %i17, label %bb18, label %bb2
780 declare void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr>, <2 x ptr>, i32 immarg, <2 x i1>)
782 define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 signext %arg2) {
783 ; CHECK-LABEL: @strided_load_startval_add_with_splat(
785 ; CHECK-NEXT: [[I:%.*]] = icmp eq i32 [[ARG2:%.*]], 1024
786 ; CHECK-NEXT: br i1 [[I]], label [[BB34:%.*]], label [[BB3:%.*]]
788 ; CHECK-NEXT: [[I4:%.*]] = sext i32 [[ARG2]] to i64
789 ; CHECK-NEXT: [[I5:%.*]] = sub i32 1023, [[ARG2]]
790 ; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64
791 ; CHECK-NEXT: [[I7:%.*]] = add nuw nsw i64 [[I6]], 1
792 ; CHECK-NEXT: [[I8:%.*]] = icmp ult i32 [[I5]], 31
793 ; CHECK-NEXT: br i1 [[I8]], label [[BB32:%.*]], label [[BB9:%.*]]
795 ; CHECK-NEXT: [[I10:%.*]] = and i64 [[I7]], 8589934560
796 ; CHECK-NEXT: [[I11:%.*]] = add nsw i64 [[I10]], [[I4]]
797 ; CHECK-NEXT: [[START:%.*]] = mul i64 [[I4]], 5
798 ; CHECK-NEXT: br label [[BB15:%.*]]
800 ; CHECK-NEXT: [[I16:%.*]] = phi i64 [ 0, [[BB9]] ], [ [[I27:%.*]], [[BB15]] ]
801 ; CHECK-NEXT: [[I17_SCALAR:%.*]] = phi i64 [ [[START]], [[BB9]] ], [ [[I28_SCALAR:%.*]], [[BB15]] ]
802 ; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]]
803 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]]
804 ; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
805 ; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]]
806 ; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1
807 ; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]]
808 ; CHECK-NEXT: store <32 x i8> [[I25]], ptr [[I22]], align 1
809 ; CHECK-NEXT: [[I27]] = add nuw i64 [[I16]], 32
810 ; CHECK-NEXT: [[I28_SCALAR]] = add i64 [[I17_SCALAR]], 160
811 ; CHECK-NEXT: [[I29:%.*]] = icmp eq i64 [[I27]], [[I10]]
812 ; CHECK-NEXT: br i1 [[I29]], label [[BB30:%.*]], label [[BB15]]
814 ; CHECK-NEXT: [[I31:%.*]] = icmp eq i64 [[I7]], [[I10]]
815 ; CHECK-NEXT: br i1 [[I31]], label [[BB34]], label [[BB32]]
817 ; CHECK-NEXT: [[I33:%.*]] = phi i64 [ [[I4]], [[BB3]] ], [ [[I11]], [[BB30]] ]
818 ; CHECK-NEXT: br label [[BB35:%.*]]
820 ; CHECK-NEXT: ret void
822 ; CHECK-NEXT: [[I36:%.*]] = phi i64 [ [[I43:%.*]], [[BB35]] ], [ [[I33]], [[BB32]] ]
823 ; CHECK-NEXT: [[I37:%.*]] = mul nsw i64 [[I36]], 5
824 ; CHECK-NEXT: [[I38:%.*]] = getelementptr inbounds i8, ptr [[ARG1]], i64 [[I37]]
825 ; CHECK-NEXT: [[I39:%.*]] = load i8, ptr [[I38]], align 1
826 ; CHECK-NEXT: [[I40:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[I36]]
827 ; CHECK-NEXT: [[I41:%.*]] = load i8, ptr [[I40]], align 1
828 ; CHECK-NEXT: [[I42:%.*]] = add i8 [[I41]], [[I39]]
829 ; CHECK-NEXT: store i8 [[I42]], ptr [[I40]], align 1
830 ; CHECK-NEXT: [[I43]] = add nsw i64 [[I36]], 1
831 ; CHECK-NEXT: [[I44:%.*]] = trunc i64 [[I43]] to i32
832 ; CHECK-NEXT: [[I45:%.*]] = icmp eq i32 [[I44]], 1024
833 ; CHECK-NEXT: br i1 [[I45]], label [[BB34]], label [[BB35]]
836 %i = icmp eq i32 %arg2, 1024
837 br i1 %i, label %bb34, label %bb3
840 %i4 = sext i32 %arg2 to i64
841 %i5 = sub i32 1023, %arg2
842 %i6 = zext i32 %i5 to i64
843 %i7 = add nuw nsw i64 %i6, 1
844 %i8 = icmp ult i32 %i5, 31
845 br i1 %i8, label %bb32, label %bb9
848 %i10 = and i64 %i7, 8589934560
849 %i11 = add nsw i64 %i10, %i4
850 %i12 = insertelement <32 x i64> poison, i64 %i4, i64 0
851 %i13 = shufflevector <32 x i64> %i12, <32 x i64> poison, <32 x i32> zeroinitializer
852 %i14 = add <32 x i64> %i13, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
855 bb15: ; preds = %bb15, %bb9
856 %i16 = phi i64 [ 0, %bb9 ], [ %i27, %bb15 ]
857 %i17 = phi <32 x i64> [ %i14, %bb9 ], [ %i28, %bb15 ]
858 %i18 = add i64 %i16, %i4
859 %i19 = mul nsw <32 x i64> %i17, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
860 %i20 = getelementptr inbounds i8, ptr %arg1, <32 x i64> %i19
861 %i21 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i20, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
862 %i22 = getelementptr inbounds i8, ptr %arg, i64 %i18
863 %i24 = load <32 x i8>, ptr %i22, align 1
864 %i25 = add <32 x i8> %i24, %i21
865 store <32 x i8> %i25, ptr %i22, align 1
866 %i27 = add nuw i64 %i16, 32
867 %i28 = add <32 x i64> %i17, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
868 %i29 = icmp eq i64 %i27, %i10
869 br i1 %i29, label %bb30, label %bb15
871 bb30: ; preds = %bb15
872 %i31 = icmp eq i64 %i7, %i10
873 br i1 %i31, label %bb34, label %bb32
875 bb32: ; preds = %bb30, %bb3
876 %i33 = phi i64 [ %i4, %bb3 ], [ %i11, %bb30 ]
879 bb34: ; preds = %bb35, %bb30, %bb
882 bb35: ; preds = %bb35, %bb32
883 %i36 = phi i64 [ %i43, %bb35 ], [ %i33, %bb32 ]
884 %i37 = mul nsw i64 %i36, 5
885 %i38 = getelementptr inbounds i8, ptr %arg1, i64 %i37
886 %i39 = load i8, ptr %i38, align 1
887 %i40 = getelementptr inbounds i8, ptr %arg, i64 %i36
888 %i41 = load i8, ptr %i40, align 1
889 %i42 = add i8 %i41, %i39
890 store i8 %i42, ptr %i40, align 1
891 %i43 = add nsw i64 %i36, 1
892 %i44 = trunc i64 %i43 to i32
893 %i45 = icmp eq i32 %i44, 1024
894 br i1 %i45, label %bb34, label %bb35
897 declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32 immarg, <16 x i1>, <16 x i8>)
898 declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32 immarg, <16 x i1>)
900 define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr noalias nocapture noundef readonly %arg1, i64 noundef %arg2) {
901 ; CHECK-LABEL: @gather_no_scalar_remainder(
903 ; CHECK-NEXT: [[I:%.*]] = shl i64 [[ARG2:%.*]], 4
904 ; CHECK-NEXT: [[I3:%.*]] = icmp eq i64 [[I]], 0
905 ; CHECK-NEXT: br i1 [[I3]], label [[BB16:%.*]], label [[BB2:%.*]]
907 ; CHECK-NEXT: br label [[BB4:%.*]]
909 ; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ]
910 ; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ]
911 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]]
912 ; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.riscv.masked.strided.load.v16i8.p0.i64(<16 x i8> undef, ptr [[TMP0]], i64 5, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
913 ; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]]
914 ; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1
915 ; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]]
916 ; CHECK-NEXT: store <16 x i8> [[I12]], ptr [[I10]], align 1
917 ; CHECK-NEXT: [[I13]] = add nuw i64 [[I5]], 16
918 ; CHECK-NEXT: [[I14_SCALAR]] = add i64 [[I6_SCALAR]], 80
919 ; CHECK-NEXT: [[I15:%.*]] = icmp eq i64 [[I13]], [[I]]
920 ; CHECK-NEXT: br i1 [[I15]], label [[BB16]], label [[BB4]]
922 ; CHECK-NEXT: ret void
925 %i = shl i64 %arg2, 4
926 %i3 = icmp eq i64 %i, 0
927 br i1 %i3, label %bb16, label %bb2
932 bb4: ; preds = %bb4, %bb2
933 %i5 = phi i64 [ %i13, %bb4 ], [ 0, %bb2 ]
934 %i6 = phi <16 x i64> [ %i14, %bb4 ], [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %bb2 ]
935 %i7 = mul <16 x i64> %i6, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
936 %i8 = getelementptr inbounds i8, ptr %arg1, <16 x i64> %i7
937 %i9 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %i8, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
938 %i10 = getelementptr inbounds i8, ptr %arg, i64 %i5
939 %i11 = load <16 x i8>, ptr %i10, align 1
940 %i12 = add <16 x i8> %i11, %i9
941 store <16 x i8> %i12, ptr %i10, align 1
942 %i13 = add nuw i64 %i5, 16
943 %i14 = add <16 x i64> %i6, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
944 %i15 = icmp eq i64 %i13, %i
945 br i1 %i15, label %bb16, label %bb4
947 bb16: ; preds = %bb4, %bb
951 define <8 x i8> @broadcast_ptr_base(ptr %a) {
952 ; CHECK-LABEL: @broadcast_ptr_base(
954 ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.riscv.masked.strided.load.v8i8.p0.i64(<8 x i8> poison, ptr [[A:%.*]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
955 ; CHECK-NEXT: ret <8 x i8> [[TMP0]]
958 %0 = insertelement <8 x ptr> poison, ptr %a, i64 0
959 %1 = shufflevector <8 x ptr> %0, <8 x ptr> poison, <8 x i32> zeroinitializer
960 %2 = getelementptr i8, <8 x ptr> %1, <8 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448>
961 %3 = tail call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %2, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> poison)
965 declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i8>)
967 define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
968 ; CHECK-LABEL: @gather_narrow_idx(
970 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
971 ; CHECK: vector.body:
972 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
973 ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
974 ; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
975 ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i16> [[I]]
976 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
977 ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
978 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
979 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
980 ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1
981 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
982 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
983 ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
984 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
985 ; CHECK: for.cond.cleanup:
986 ; CHECK-NEXT: ret void
989 br label %vector.body
991 vector.body: ; preds = %vector.body, %entry
992 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
993 %vec.ind = phi <32 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, %entry ], [ %vec.ind.next, %vector.body ]
994 %i = mul nuw nsw <32 x i16> %vec.ind, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
995 %i1 = getelementptr inbounds i8, ptr %B, <32 x i16> %i
996 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
997 %i2 = getelementptr inbounds i8, ptr %A, i64 %index
998 %wide.load = load <32 x i8>, ptr %i2, align 1
999 %i4 = add <32 x i8> %wide.load, %wide.masked.gather
1000 store <32 x i8> %i4, ptr %i2, align 1
1001 %index.next = add nuw i64 %index, 32
1002 %vec.ind.next = add <32 x i16> %vec.ind, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
1003 %i6 = icmp eq i64 %index.next, 1024
1004 br i1 %i6, label %for.cond.cleanup, label %vector.body
1006 for.cond.cleanup: ; preds = %vector.body