1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; REQUIRES: aarch64-registered-target
3 ; RUN: opt -passes='lower-matrix-intrinsics' -matrix-default-layout=row-major -mtriple=arm64-apple-iphoneos -S < %s | FileCheck %s
5 define <1 x i32> @dotproduct_i32_v8(<8 x i32> %a, <8 x i32> %b) {
6 ; CHECK-LABEL: @dotproduct_i32_v8(
8 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
9 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
10 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 1>
11 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 2>
12 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 3>
13 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 4>
14 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 5>
15 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 6>
16 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 7>
17 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT1]], <1 x i32> poison, <1 x i32> zeroinitializer
18 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0
19 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
20 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
21 ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
22 ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <1 x i32> [[SPLIT2]], <1 x i32> poison, <1 x i32> zeroinitializer
23 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1
24 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
25 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer
26 ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT11]], [[BLOCK9]]
27 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
28 ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <1 x i32> [[SPLIT3]], <1 x i32> poison, <1 x i32> zeroinitializer
29 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 2
30 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0
31 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT13]], <1 x i32> poison, <1 x i32> zeroinitializer
32 ; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], [[BLOCK12]]
33 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]]
34 ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <1 x i32> [[SPLIT4]], <1 x i32> poison, <1 x i32> zeroinitializer
35 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 3
36 ; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
37 ; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT16]], <1 x i32> poison, <1 x i32> zeroinitializer
38 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT17]], [[BLOCK15]]
39 ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]]
40 ; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <1 x i32> [[SPLIT5]], <1 x i32> poison, <1 x i32> zeroinitializer
41 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 4
42 ; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0
43 ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT19]], <1 x i32> poison, <1 x i32> zeroinitializer
44 ; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT20]], [[BLOCK18]]
45 ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]]
46 ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <1 x i32> [[SPLIT6]], <1 x i32> poison, <1 x i32> zeroinitializer
47 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 5
48 ; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
49 ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT22]], <1 x i32> poison, <1 x i32> zeroinitializer
50 ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT23]], [[BLOCK21]]
51 ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i32> [[TMP13]], [[TMP15]]
52 ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <1 x i32> [[SPLIT7]], <1 x i32> poison, <1 x i32> zeroinitializer
53 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 6
54 ; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x i32> poison, i32 [[TMP17]], i64 0
55 ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT25]], <1 x i32> poison, <1 x i32> zeroinitializer
56 ; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT26]], [[BLOCK24]]
57 ; CHECK-NEXT: [[TMP19:%.*]] = add <1 x i32> [[TMP16]], [[TMP18]]
58 ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <1 x i32> [[SPLIT8]], <1 x i32> poison, <1 x i32> zeroinitializer
59 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 7
60 ; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x i32> poison, i32 [[TMP20]], i64 0
61 ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT28]], <1 x i32> poison, <1 x i32> zeroinitializer
62 ; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT29]], [[BLOCK27]]
63 ; CHECK-NEXT: [[TMP22:%.*]] = add <1 x i32> [[TMP19]], [[TMP21]]
64 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x i32> [[TMP22]], <1 x i32> poison, <1 x i32> zeroinitializer
65 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP23]], <1 x i32> <i32 1>
66 ; CHECK-NEXT: ret <1 x i32> [[TMP24]]
69 %c = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %a, <8 x i32> %b, i32 1, i32 8, i32 1)
73 declare <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32>, <8 x i32>, i32, i32, i32)
75 declare void @use(<1 x i32>)
77 define <1 x i32> @dotproduct_i32_v8_result_used_by_inst(<8 x i32> %a, <8 x i32> %b) {
78 ; CHECK-LABEL: @dotproduct_i32_v8_result_used_by_inst(
80 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
81 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
82 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 1>
83 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 2>
84 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 3>
85 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 4>
86 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 5>
87 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 6>
88 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 7>
89 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT1]], <1 x i32> poison, <1 x i32> zeroinitializer
90 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0
91 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
92 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
93 ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
94 ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <1 x i32> [[SPLIT2]], <1 x i32> poison, <1 x i32> zeroinitializer
95 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1
96 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
97 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer
98 ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT11]], [[BLOCK9]]
99 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
100 ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <1 x i32> [[SPLIT3]], <1 x i32> poison, <1 x i32> zeroinitializer
101 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 2
102 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0
103 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT13]], <1 x i32> poison, <1 x i32> zeroinitializer
104 ; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], [[BLOCK12]]
105 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]]
106 ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <1 x i32> [[SPLIT4]], <1 x i32> poison, <1 x i32> zeroinitializer
107 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 3
108 ; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
109 ; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT16]], <1 x i32> poison, <1 x i32> zeroinitializer
110 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT17]], [[BLOCK15]]
111 ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]]
112 ; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <1 x i32> [[SPLIT5]], <1 x i32> poison, <1 x i32> zeroinitializer
113 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 4
114 ; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0
115 ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT19]], <1 x i32> poison, <1 x i32> zeroinitializer
116 ; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT20]], [[BLOCK18]]
117 ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]]
118 ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <1 x i32> [[SPLIT6]], <1 x i32> poison, <1 x i32> zeroinitializer
119 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 5
120 ; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
121 ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT22]], <1 x i32> poison, <1 x i32> zeroinitializer
122 ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT23]], [[BLOCK21]]
123 ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i32> [[TMP13]], [[TMP15]]
124 ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <1 x i32> [[SPLIT7]], <1 x i32> poison, <1 x i32> zeroinitializer
125 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 6
126 ; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x i32> poison, i32 [[TMP17]], i64 0
127 ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT25]], <1 x i32> poison, <1 x i32> zeroinitializer
128 ; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT26]], [[BLOCK24]]
129 ; CHECK-NEXT: [[TMP19:%.*]] = add <1 x i32> [[TMP16]], [[TMP18]]
130 ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <1 x i32> [[SPLIT8]], <1 x i32> poison, <1 x i32> zeroinitializer
131 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 7
132 ; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x i32> poison, i32 [[TMP20]], i64 0
133 ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT28]], <1 x i32> poison, <1 x i32> zeroinitializer
134 ; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT29]], [[BLOCK27]]
135 ; CHECK-NEXT: [[TMP22:%.*]] = add <1 x i32> [[TMP19]], [[TMP21]]
136 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x i32> [[TMP22]], <1 x i32> poison, <1 x i32> zeroinitializer
137 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP23]], <1 x i32> <i32 1>
138 ; CHECK-NEXT: call void @use(<1 x i32> [[TMP24]])
139 ; CHECK-NEXT: ret <1 x i32> [[TMP24]]
142 %c = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %a, <8 x i32> %b, i32 1, i32 8, i32 1)
143 call void @use(<1 x i32> %c)
148 define <1 x i32> @dotproduct_i32_v8_constvector(<8 x i32> %a) {
149 ; CHECK-LABEL: @dotproduct_i32_v8_constvector(
151 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
152 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0
153 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
154 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
155 ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], splat (i32 1)
156 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1
157 ; CHECK-NEXT: [[SPLAT_SPLATINSERT1:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
158 ; CHECK-NEXT: [[SPLAT_SPLAT2:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT1]], <1 x i32> poison, <1 x i32> zeroinitializer
159 ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], splat (i32 2)
160 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
161 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 2
162 ; CHECK-NEXT: [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0
163 ; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT3]], <1 x i32> poison, <1 x i32> zeroinitializer
164 ; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], splat (i32 3)
165 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]]
166 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 3
167 ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
168 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer
169 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], splat (i32 4)
170 ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]]
171 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 4
172 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0
173 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer
174 ; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], splat (i32 5)
175 ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]]
176 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 5
177 ; CHECK-NEXT: [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
178 ; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer
179 ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], splat (i32 6)
180 ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i32> [[TMP13]], [[TMP15]]
181 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 6
182 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP17]], i64 0
183 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
184 ; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], splat (i32 7)
185 ; CHECK-NEXT: [[TMP19:%.*]] = add <1 x i32> [[TMP16]], [[TMP18]]
186 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 7
187 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i32> poison, i32 [[TMP20]], i64 0
188 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT13]], <1 x i32> poison, <1 x i32> zeroinitializer
189 ; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], splat (i32 8)
190 ; CHECK-NEXT: [[TMP22:%.*]] = add <1 x i32> [[TMP19]], [[TMP21]]
191 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x i32> [[TMP22]], <1 x i32> poison, <1 x i32> zeroinitializer
192 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP23]], <1 x i32> <i32 1>
193 ; CHECK-NEXT: ret <1 x i32> [[TMP24]]
196 %c = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %a, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, i32 1, i32 8, i32 1)
200 define <1 x i32> @add_feeding_dotproduct_i32_v8_1(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
201 ; CHECK-LABEL: @add_feeding_dotproduct_i32_v8_1(
203 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
204 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
205 ; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i32> [[SPLIT]], [[SPLIT1]]
206 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
207 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 1>
208 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 2>
209 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 3>
210 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 4>
211 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 5>
212 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 6>
213 ; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 7>
214 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT2]], <1 x i32> poison, <1 x i32> zeroinitializer
215 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0
216 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP1]], i64 0
217 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
218 ; CHECK-NEXT: [[TMP2:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
219 ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <1 x i32> [[SPLIT3]], <1 x i32> poison, <1 x i32> zeroinitializer
220 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1
221 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP3]], i64 0
222 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
223 ; CHECK-NEXT: [[TMP4:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], [[BLOCK10]]
224 ; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i32> [[TMP2]], [[TMP4]]
225 ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <1 x i32> [[SPLIT4]], <1 x i32> poison, <1 x i32> zeroinitializer
226 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP0]], i64 2
227 ; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP6]], i64 0
228 ; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer
229 ; CHECK-NEXT: [[TMP7:%.*]] = mul <1 x i32> [[SPLAT_SPLAT15]], [[BLOCK13]]
230 ; CHECK-NEXT: [[TMP8:%.*]] = add <1 x i32> [[TMP5]], [[TMP7]]
231 ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <1 x i32> [[SPLIT5]], <1 x i32> poison, <1 x i32> zeroinitializer
232 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i64 3
233 ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
234 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer
235 ; CHECK-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT18]], [[BLOCK16]]
236 ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
237 ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <1 x i32> [[SPLIT6]], <1 x i32> poison, <1 x i32> zeroinitializer
238 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP0]], i64 4
239 ; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0
240 ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer
241 ; CHECK-NEXT: [[TMP13:%.*]] = mul <1 x i32> [[SPLAT_SPLAT21]], [[BLOCK19]]
242 ; CHECK-NEXT: [[TMP14:%.*]] = add <1 x i32> [[TMP11]], [[TMP13]]
243 ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <1 x i32> [[SPLIT7]], <1 x i32> poison, <1 x i32> zeroinitializer
244 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i64 5
245 ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP15]], i64 0
246 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer
247 ; CHECK-NEXT: [[TMP16:%.*]] = mul <1 x i32> [[SPLAT_SPLAT24]], [[BLOCK22]]
248 ; CHECK-NEXT: [[TMP17:%.*]] = add <1 x i32> [[TMP14]], [[TMP16]]
249 ; CHECK-NEXT: [[BLOCK25:%.*]] = shufflevector <1 x i32> [[SPLIT8]], <1 x i32> poison, <1 x i32> zeroinitializer
250 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP0]], i64 6
251 ; CHECK-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x i32> poison, i32 [[TMP18]], i64 0
252 ; CHECK-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT26]], <1 x i32> poison, <1 x i32> zeroinitializer
253 ; CHECK-NEXT: [[TMP19:%.*]] = mul <1 x i32> [[SPLAT_SPLAT27]], [[BLOCK25]]
254 ; CHECK-NEXT: [[TMP20:%.*]] = add <1 x i32> [[TMP17]], [[TMP19]]
255 ; CHECK-NEXT: [[BLOCK28:%.*]] = shufflevector <1 x i32> [[SPLIT9]], <1 x i32> poison, <1 x i32> zeroinitializer
256 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP0]], i64 7
257 ; CHECK-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
258 ; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT29]], <1 x i32> poison, <1 x i32> zeroinitializer
259 ; CHECK-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT30]], [[BLOCK28]]
260 ; CHECK-NEXT: [[TMP23:%.*]] = add <1 x i32> [[TMP20]], [[TMP22]]
261 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> [[TMP23]], <1 x i32> poison, <1 x i32> zeroinitializer
262 ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP24]], <1 x i32> <i32 1>
263 ; CHECK-NEXT: ret <1 x i32> [[TMP25]]
266 %add = add <8 x i32> %a, %b
267 %res = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %add, <8 x i32> %c, i32 1, i32 8, i32 1)
271 define <1 x i32> @add_feeding_dotproduct_i32_v8_2(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
272 ; CHECK-LABEL: @add_feeding_dotproduct_i32_v8_2(
274 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
275 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 1>
276 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 2>
277 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 3>
278 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 4>
279 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 5>
280 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 6>
281 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 7>
282 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
283 ; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 1>
284 ; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 2>
285 ; CHECK-NEXT: [[SPLIT11:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 3>
286 ; CHECK-NEXT: [[SPLIT12:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 4>
287 ; CHECK-NEXT: [[SPLIT13:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 5>
288 ; CHECK-NEXT: [[SPLIT14:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 6>
289 ; CHECK-NEXT: [[SPLIT15:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 7>
290 ; CHECK-NEXT: [[TMP0:%.*]] = add <1 x i32> [[SPLIT]], [[SPLIT8]]
291 ; CHECK-NEXT: [[TMP1:%.*]] = add <1 x i32> [[SPLIT1]], [[SPLIT9]]
292 ; CHECK-NEXT: [[TMP2:%.*]] = add <1 x i32> [[SPLIT2]], [[SPLIT10]]
293 ; CHECK-NEXT: [[TMP3:%.*]] = add <1 x i32> [[SPLIT3]], [[SPLIT11]]
294 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[SPLIT4]], [[SPLIT12]]
295 ; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i32> [[SPLIT5]], [[SPLIT13]]
296 ; CHECK-NEXT: [[TMP6:%.*]] = add <1 x i32> [[SPLIT6]], [[SPLIT14]]
297 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[SPLIT7]], [[SPLIT15]]
298 ; CHECK-NEXT: [[SPLIT16:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
299 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[TMP0]], <1 x i32> poison, <1 x i32> zeroinitializer
300 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 0
301 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
302 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
303 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
304 ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <1 x i32> zeroinitializer
305 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 1
306 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP10]], i64 0
307 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
308 ; CHECK-NEXT: [[TMP11:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]]
309 ; CHECK-NEXT: [[TMP12:%.*]] = add <1 x i32> [[TMP9]], [[TMP11]]
310 ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <1 x i32> [[TMP2]], <1 x i32> poison, <1 x i32> zeroinitializer
311 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 2
312 ; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP13]], i64 0
313 ; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
314 ; CHECK-NEXT: [[TMP14:%.*]] = mul <1 x i32> [[SPLAT_SPLAT22]], [[BLOCK20]]
315 ; CHECK-NEXT: [[TMP15:%.*]] = add <1 x i32> [[TMP12]], [[TMP14]]
316 ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <1 x i32> [[TMP3]], <1 x i32> poison, <1 x i32> zeroinitializer
317 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 3
318 ; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
319 ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
320 ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]]
321 ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
322 ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <1 x i32> zeroinitializer
323 ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 4
324 ; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP19]], i64 0
325 ; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
326 ; CHECK-NEXT: [[TMP20:%.*]] = mul <1 x i32> [[SPLAT_SPLAT28]], [[BLOCK26]]
327 ; CHECK-NEXT: [[TMP21:%.*]] = add <1 x i32> [[TMP18]], [[TMP20]]
328 ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <1 x i32> zeroinitializer
329 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 5
330 ; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP22]], i64 0
331 ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
332 ; CHECK-NEXT: [[TMP23:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]]
333 ; CHECK-NEXT: [[TMP24:%.*]] = add <1 x i32> [[TMP21]], [[TMP23]]
334 ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <1 x i32> [[TMP6]], <1 x i32> poison, <1 x i32> zeroinitializer
335 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 6
336 ; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP25]], i64 0
337 ; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
338 ; CHECK-NEXT: [[TMP26:%.*]] = mul <1 x i32> [[SPLAT_SPLAT34]], [[BLOCK32]]
339 ; CHECK-NEXT: [[TMP27:%.*]] = add <1 x i32> [[TMP24]], [[TMP26]]
340 ; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <1 x i32> [[TMP7]], <1 x i32> poison, <1 x i32> zeroinitializer
341 ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 7
342 ; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i64 0
343 ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
344 ; CHECK-NEXT: [[TMP29:%.*]] = mul <1 x i32> [[SPLAT_SPLAT37]], [[BLOCK35]]
345 ; CHECK-NEXT: [[TMP30:%.*]] = add <1 x i32> [[TMP27]], [[TMP29]]
346 ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <1 x i32> [[TMP30]], <1 x i32> poison, <1 x i32> zeroinitializer
347 ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP31]], <1 x i32> <i32 1>
348 ; CHECK-NEXT: ret <1 x i32> [[TMP32]]
351 %add = add <8 x i32> %a, %b
352 %res = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %c, <8 x i32> %add, i32 1, i32 8, i32 1)
356 define <1 x i32> @sub_feeding_dotproduct_i32_v8_1(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
357 ; CHECK-LABEL: @sub_feeding_dotproduct_i32_v8_1(
359 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
360 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
361 ; CHECK-NEXT: [[TMP0:%.*]] = sub <8 x i32> [[SPLIT]], [[SPLIT1]]
362 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
363 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 1>
364 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 2>
365 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 3>
366 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 4>
367 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 5>
368 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 6>
369 ; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 7>
370 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT2]], <1 x i32> poison, <1 x i32> zeroinitializer
371 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0
372 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP1]], i64 0
373 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
374 ; CHECK-NEXT: [[TMP2:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
375 ; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <1 x i32> [[SPLIT3]], <1 x i32> poison, <1 x i32> zeroinitializer
376 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1
377 ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP3]], i64 0
378 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
379 ; CHECK-NEXT: [[TMP4:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], [[BLOCK10]]
380 ; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i32> [[TMP2]], [[TMP4]]
381 ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <1 x i32> [[SPLIT4]], <1 x i32> poison, <1 x i32> zeroinitializer
382 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP0]], i64 2
383 ; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP6]], i64 0
384 ; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer
385 ; CHECK-NEXT: [[TMP7:%.*]] = mul <1 x i32> [[SPLAT_SPLAT15]], [[BLOCK13]]
386 ; CHECK-NEXT: [[TMP8:%.*]] = add <1 x i32> [[TMP5]], [[TMP7]]
387 ; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <1 x i32> [[SPLIT5]], <1 x i32> poison, <1 x i32> zeroinitializer
388 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i64 3
389 ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
390 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer
391 ; CHECK-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT18]], [[BLOCK16]]
392 ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
393 ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <1 x i32> [[SPLIT6]], <1 x i32> poison, <1 x i32> zeroinitializer
394 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP0]], i64 4
395 ; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0
396 ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer
397 ; CHECK-NEXT: [[TMP13:%.*]] = mul <1 x i32> [[SPLAT_SPLAT21]], [[BLOCK19]]
398 ; CHECK-NEXT: [[TMP14:%.*]] = add <1 x i32> [[TMP11]], [[TMP13]]
399 ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <1 x i32> [[SPLIT7]], <1 x i32> poison, <1 x i32> zeroinitializer
400 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i64 5
401 ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP15]], i64 0
402 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer
403 ; CHECK-NEXT: [[TMP16:%.*]] = mul <1 x i32> [[SPLAT_SPLAT24]], [[BLOCK22]]
404 ; CHECK-NEXT: [[TMP17:%.*]] = add <1 x i32> [[TMP14]], [[TMP16]]
405 ; CHECK-NEXT: [[BLOCK25:%.*]] = shufflevector <1 x i32> [[SPLIT8]], <1 x i32> poison, <1 x i32> zeroinitializer
406 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP0]], i64 6
407 ; CHECK-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x i32> poison, i32 [[TMP18]], i64 0
408 ; CHECK-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT26]], <1 x i32> poison, <1 x i32> zeroinitializer
409 ; CHECK-NEXT: [[TMP19:%.*]] = mul <1 x i32> [[SPLAT_SPLAT27]], [[BLOCK25]]
410 ; CHECK-NEXT: [[TMP20:%.*]] = add <1 x i32> [[TMP17]], [[TMP19]]
411 ; CHECK-NEXT: [[BLOCK28:%.*]] = shufflevector <1 x i32> [[SPLIT9]], <1 x i32> poison, <1 x i32> zeroinitializer
412 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP0]], i64 7
413 ; CHECK-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
414 ; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT29]], <1 x i32> poison, <1 x i32> zeroinitializer
415 ; CHECK-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT30]], [[BLOCK28]]
416 ; CHECK-NEXT: [[TMP23:%.*]] = add <1 x i32> [[TMP20]], [[TMP22]]
417 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i32> [[TMP23]], <1 x i32> poison, <1 x i32> zeroinitializer
418 ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP24]], <1 x i32> <i32 1>
419 ; CHECK-NEXT: ret <1 x i32> [[TMP25]]
422 %sub = sub <8 x i32> %a, %b
423 %res = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %sub, <8 x i32> %c, i32 1, i32 8, i32 1)
427 define <1 x i32> @sub_feeding_dotproduct_i32_v8_2(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) {
428 ; CHECK-LABEL: @sub_feeding_dotproduct_i32_v8_2(
430 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
431 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 1>
432 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 2>
433 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 3>
434 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 4>
435 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 5>
436 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 6>
437 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 7>
438 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
439 ; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 1>
440 ; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 2>
441 ; CHECK-NEXT: [[SPLIT11:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 3>
442 ; CHECK-NEXT: [[SPLIT12:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 4>
443 ; CHECK-NEXT: [[SPLIT13:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 5>
444 ; CHECK-NEXT: [[SPLIT14:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 6>
445 ; CHECK-NEXT: [[SPLIT15:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 7>
446 ; CHECK-NEXT: [[TMP0:%.*]] = sub <1 x i32> [[SPLIT]], [[SPLIT8]]
447 ; CHECK-NEXT: [[TMP1:%.*]] = sub <1 x i32> [[SPLIT1]], [[SPLIT9]]
448 ; CHECK-NEXT: [[TMP2:%.*]] = sub <1 x i32> [[SPLIT2]], [[SPLIT10]]
449 ; CHECK-NEXT: [[TMP3:%.*]] = sub <1 x i32> [[SPLIT3]], [[SPLIT11]]
450 ; CHECK-NEXT: [[TMP4:%.*]] = sub <1 x i32> [[SPLIT4]], [[SPLIT12]]
451 ; CHECK-NEXT: [[TMP5:%.*]] = sub <1 x i32> [[SPLIT5]], [[SPLIT13]]
452 ; CHECK-NEXT: [[TMP6:%.*]] = sub <1 x i32> [[SPLIT6]], [[SPLIT14]]
453 ; CHECK-NEXT: [[TMP7:%.*]] = sub <1 x i32> [[SPLIT7]], [[SPLIT15]]
454 ; CHECK-NEXT: [[SPLIT16:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
455 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[TMP0]], <1 x i32> poison, <1 x i32> zeroinitializer
456 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 0
457 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
458 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
459 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
460 ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <1 x i32> zeroinitializer
461 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 1
462 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP10]], i64 0
463 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
464 ; CHECK-NEXT: [[TMP11:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]]
465 ; CHECK-NEXT: [[TMP12:%.*]] = add <1 x i32> [[TMP9]], [[TMP11]]
466 ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <1 x i32> [[TMP2]], <1 x i32> poison, <1 x i32> zeroinitializer
467 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 2
468 ; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP13]], i64 0
469 ; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
470 ; CHECK-NEXT: [[TMP14:%.*]] = mul <1 x i32> [[SPLAT_SPLAT22]], [[BLOCK20]]
471 ; CHECK-NEXT: [[TMP15:%.*]] = add <1 x i32> [[TMP12]], [[TMP14]]
472 ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <1 x i32> [[TMP3]], <1 x i32> poison, <1 x i32> zeroinitializer
473 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 3
474 ; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
475 ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
476 ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]]
477 ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
478 ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <1 x i32> zeroinitializer
479 ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 4
480 ; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP19]], i64 0
481 ; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
482 ; CHECK-NEXT: [[TMP20:%.*]] = mul <1 x i32> [[SPLAT_SPLAT28]], [[BLOCK26]]
483 ; CHECK-NEXT: [[TMP21:%.*]] = add <1 x i32> [[TMP18]], [[TMP20]]
484 ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <1 x i32> zeroinitializer
485 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 5
486 ; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP22]], i64 0
487 ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
488 ; CHECK-NEXT: [[TMP23:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]]
489 ; CHECK-NEXT: [[TMP24:%.*]] = add <1 x i32> [[TMP21]], [[TMP23]]
490 ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <1 x i32> [[TMP6]], <1 x i32> poison, <1 x i32> zeroinitializer
491 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 6
492 ; CHECK-NEXT: [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP25]], i64 0
493 ; CHECK-NEXT: [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
494 ; CHECK-NEXT: [[TMP26:%.*]] = mul <1 x i32> [[SPLAT_SPLAT34]], [[BLOCK32]]
495 ; CHECK-NEXT: [[TMP27:%.*]] = add <1 x i32> [[TMP24]], [[TMP26]]
496 ; CHECK-NEXT: [[BLOCK35:%.*]] = shufflevector <1 x i32> [[TMP7]], <1 x i32> poison, <1 x i32> zeroinitializer
497 ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[SPLIT16]], i64 7
498 ; CHECK-NEXT: [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i64 0
499 ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
500 ; CHECK-NEXT: [[TMP29:%.*]] = mul <1 x i32> [[SPLAT_SPLAT37]], [[BLOCK35]]
501 ; CHECK-NEXT: [[TMP30:%.*]] = add <1 x i32> [[TMP27]], [[TMP29]]
502 ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <1 x i32> [[TMP30]], <1 x i32> poison, <1 x i32> zeroinitializer
503 ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP31]], <1 x i32> <i32 1>
504 ; CHECK-NEXT: ret <1 x i32> [[TMP32]]
507 %sub = sub <8 x i32> %a, %b
508 %res = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %c, <8 x i32> %sub, i32 1, i32 8, i32 1)
512 define <1 x i32> @add_chain_feeding_dotproduct_i32_v8_1(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
513 ; CHECK-LABEL: @add_chain_feeding_dotproduct_i32_v8_1(
515 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
516 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
517 ; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i32> [[SPLIT]], [[SPLIT1]]
518 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
519 ; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[TMP0]], [[SPLIT2]]
520 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[D:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
521 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 1>
522 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 2>
523 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 3>
524 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 4>
525 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 5>
526 ; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 6>
527 ; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <8 x i32> [[D]], <8 x i32> poison, <1 x i32> <i32 7>
528 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT3]], <1 x i32> poison, <1 x i32> zeroinitializer
529 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
530 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
531 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
532 ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
533 ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <1 x i32> [[SPLIT4]], <1 x i32> poison, <1 x i32> zeroinitializer
534 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
535 ; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i64 0
536 ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer
537 ; CHECK-NEXT: [[TMP5:%.*]] = mul <1 x i32> [[SPLAT_SPLAT13]], [[BLOCK11]]
538 ; CHECK-NEXT: [[TMP6:%.*]] = add <1 x i32> [[TMP3]], [[TMP5]]
539 ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <1 x i32> [[SPLIT5]], <1 x i32> poison, <1 x i32> zeroinitializer
540 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
541 ; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i64 0
542 ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer
543 ; CHECK-NEXT: [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT16]], [[BLOCK14]]
544 ; CHECK-NEXT: [[TMP9:%.*]] = add <1 x i32> [[TMP6]], [[TMP8]]
545 ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <1 x i32> [[SPLIT6]], <1 x i32> poison, <1 x i32> zeroinitializer
546 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
547 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP10]], i64 0
548 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
549 ; CHECK-NEXT: [[TMP11:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]]
550 ; CHECK-NEXT: [[TMP12:%.*]] = add <1 x i32> [[TMP9]], [[TMP11]]
551 ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <1 x i32> [[SPLIT7]], <1 x i32> poison, <1 x i32> zeroinitializer
552 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
553 ; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP13]], i64 0
554 ; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
555 ; CHECK-NEXT: [[TMP14:%.*]] = mul <1 x i32> [[SPLAT_SPLAT22]], [[BLOCK20]]
556 ; CHECK-NEXT: [[TMP15:%.*]] = add <1 x i32> [[TMP12]], [[TMP14]]
557 ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <1 x i32> [[SPLIT8]], <1 x i32> poison, <1 x i32> zeroinitializer
558 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
559 ; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
560 ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
561 ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]]
562 ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
563 ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <1 x i32> [[SPLIT9]], <1 x i32> poison, <1 x i32> zeroinitializer
564 ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
565 ; CHECK-NEXT: [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP19]], i64 0
566 ; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
567 ; CHECK-NEXT: [[TMP20:%.*]] = mul <1 x i32> [[SPLAT_SPLAT28]], [[BLOCK26]]
568 ; CHECK-NEXT: [[TMP21:%.*]] = add <1 x i32> [[TMP18]], [[TMP20]]
569 ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <1 x i32> [[SPLIT10]], <1 x i32> poison, <1 x i32> zeroinitializer
570 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
571 ; CHECK-NEXT: [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP22]], i64 0
572 ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
573 ; CHECK-NEXT: [[TMP23:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]]
574 ; CHECK-NEXT: [[TMP24:%.*]] = add <1 x i32> [[TMP21]], [[TMP23]]
575 ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <1 x i32> [[TMP24]], <1 x i32> poison, <1 x i32> zeroinitializer
576 ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP25]], <1 x i32> <i32 1>
577 ; CHECK-NEXT: ret <1 x i32> [[TMP26]]
580 %add.1 = add <8 x i32> %a, %b
581 %add.2 = add <8 x i32> %add.1, %c
582 %res = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %add.2, <8 x i32> %d, i32 1, i32 8, i32 1)
586 define <1 x i32> @add_chain_feeding_dotproduct_i32_v8_2(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
587 ; CHECK-LABEL: @add_chain_feeding_dotproduct_i32_v8_2(
589 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
590 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 1>
591 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 2>
592 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 3>
593 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 4>
594 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 5>
595 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 6>
596 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 7>
597 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
598 ; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 1>
599 ; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 2>
600 ; CHECK-NEXT: [[SPLIT11:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 3>
601 ; CHECK-NEXT: [[SPLIT12:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 4>
602 ; CHECK-NEXT: [[SPLIT13:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 5>
603 ; CHECK-NEXT: [[SPLIT14:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 6>
604 ; CHECK-NEXT: [[SPLIT15:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <1 x i32> <i32 7>
605 ; CHECK-NEXT: [[TMP0:%.*]] = add <1 x i32> [[SPLIT]], [[SPLIT8]]
606 ; CHECK-NEXT: [[TMP1:%.*]] = add <1 x i32> [[SPLIT1]], [[SPLIT9]]
607 ; CHECK-NEXT: [[TMP2:%.*]] = add <1 x i32> [[SPLIT2]], [[SPLIT10]]
608 ; CHECK-NEXT: [[TMP3:%.*]] = add <1 x i32> [[SPLIT3]], [[SPLIT11]]
609 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[SPLIT4]], [[SPLIT12]]
610 ; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i32> [[SPLIT5]], [[SPLIT13]]
611 ; CHECK-NEXT: [[TMP6:%.*]] = add <1 x i32> [[SPLIT6]], [[SPLIT14]]
612 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i32> [[SPLIT7]], [[SPLIT15]]
613 ; CHECK-NEXT: [[SPLIT16:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
614 ; CHECK-NEXT: [[SPLIT17:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 1>
615 ; CHECK-NEXT: [[SPLIT18:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 2>
616 ; CHECK-NEXT: [[SPLIT19:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 3>
617 ; CHECK-NEXT: [[SPLIT20:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 4>
618 ; CHECK-NEXT: [[SPLIT21:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 5>
619 ; CHECK-NEXT: [[SPLIT22:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 6>
620 ; CHECK-NEXT: [[SPLIT23:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <1 x i32> <i32 7>
621 ; CHECK-NEXT: [[TMP8:%.*]] = add <1 x i32> [[TMP0]], [[SPLIT16]]
622 ; CHECK-NEXT: [[TMP9:%.*]] = add <1 x i32> [[TMP1]], [[SPLIT17]]
623 ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i32> [[TMP2]], [[SPLIT18]]
624 ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP3]], [[SPLIT19]]
625 ; CHECK-NEXT: [[TMP12:%.*]] = add <1 x i32> [[TMP4]], [[SPLIT20]]
626 ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i32> [[TMP5]], [[SPLIT21]]
627 ; CHECK-NEXT: [[TMP14:%.*]] = add <1 x i32> [[TMP6]], [[SPLIT22]]
628 ; CHECK-NEXT: [[TMP15:%.*]] = add <1 x i32> [[TMP7]], [[SPLIT23]]
629 ; CHECK-NEXT: [[SPLIT24:%.*]] = shufflevector <8 x i32> [[D:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
630 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i32> [[TMP8]], <1 x i32> poison, <1 x i32> zeroinitializer
631 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 0
632 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
633 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
634 ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
635 ; CHECK-NEXT: [[BLOCK25:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <1 x i32> zeroinitializer
636 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 1
637 ; CHECK-NEXT: [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x i32> poison, i32 [[TMP18]], i64 0
638 ; CHECK-NEXT: [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT26]], <1 x i32> poison, <1 x i32> zeroinitializer
639 ; CHECK-NEXT: [[TMP19:%.*]] = mul <1 x i32> [[SPLAT_SPLAT27]], [[BLOCK25]]
640 ; CHECK-NEXT: [[TMP20:%.*]] = add <1 x i32> [[TMP17]], [[TMP19]]
641 ; CHECK-NEXT: [[BLOCK28:%.*]] = shufflevector <1 x i32> [[TMP10]], <1 x i32> poison, <1 x i32> zeroinitializer
642 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 2
643 ; CHECK-NEXT: [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
644 ; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT29]], <1 x i32> poison, <1 x i32> zeroinitializer
645 ; CHECK-NEXT: [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT30]], [[BLOCK28]]
646 ; CHECK-NEXT: [[TMP23:%.*]] = add <1 x i32> [[TMP20]], [[TMP22]]
647 ; CHECK-NEXT: [[BLOCK31:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <1 x i32> zeroinitializer
648 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 3
649 ; CHECK-NEXT: [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x i32> poison, i32 [[TMP24]], i64 0
650 ; CHECK-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT32]], <1 x i32> poison, <1 x i32> zeroinitializer
651 ; CHECK-NEXT: [[TMP25:%.*]] = mul <1 x i32> [[SPLAT_SPLAT33]], [[BLOCK31]]
652 ; CHECK-NEXT: [[TMP26:%.*]] = add <1 x i32> [[TMP23]], [[TMP25]]
653 ; CHECK-NEXT: [[BLOCK34:%.*]] = shufflevector <1 x i32> [[TMP12]], <1 x i32> poison, <1 x i32> zeroinitializer
654 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 4
655 ; CHECK-NEXT: [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x i32> poison, i32 [[TMP27]], i64 0
656 ; CHECK-NEXT: [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT35]], <1 x i32> poison, <1 x i32> zeroinitializer
657 ; CHECK-NEXT: [[TMP28:%.*]] = mul <1 x i32> [[SPLAT_SPLAT36]], [[BLOCK34]]
658 ; CHECK-NEXT: [[TMP29:%.*]] = add <1 x i32> [[TMP26]], [[TMP28]]
659 ; CHECK-NEXT: [[BLOCK37:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <1 x i32> zeroinitializer
660 ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 5
661 ; CHECK-NEXT: [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i64 0
662 ; CHECK-NEXT: [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT38]], <1 x i32> poison, <1 x i32> zeroinitializer
663 ; CHECK-NEXT: [[TMP31:%.*]] = mul <1 x i32> [[SPLAT_SPLAT39]], [[BLOCK37]]
664 ; CHECK-NEXT: [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]]
665 ; CHECK-NEXT: [[BLOCK40:%.*]] = shufflevector <1 x i32> [[TMP14]], <1 x i32> poison, <1 x i32> zeroinitializer
666 ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 6
667 ; CHECK-NEXT: [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x i32> poison, i32 [[TMP33]], i64 0
668 ; CHECK-NEXT: [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT41]], <1 x i32> poison, <1 x i32> zeroinitializer
669 ; CHECK-NEXT: [[TMP34:%.*]] = mul <1 x i32> [[SPLAT_SPLAT42]], [[BLOCK40]]
670 ; CHECK-NEXT: [[TMP35:%.*]] = add <1 x i32> [[TMP32]], [[TMP34]]
671 ; CHECK-NEXT: [[BLOCK43:%.*]] = shufflevector <1 x i32> [[TMP15]], <1 x i32> poison, <1 x i32> zeroinitializer
672 ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i32> [[SPLIT24]], i64 7
673 ; CHECK-NEXT: [[SPLAT_SPLATINSERT44:%.*]] = insertelement <1 x i32> poison, i32 [[TMP36]], i64 0
674 ; CHECK-NEXT: [[SPLAT_SPLAT45:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT44]], <1 x i32> poison, <1 x i32> zeroinitializer
675 ; CHECK-NEXT: [[TMP37:%.*]] = mul <1 x i32> [[SPLAT_SPLAT45]], [[BLOCK43]]
676 ; CHECK-NEXT: [[TMP38:%.*]] = add <1 x i32> [[TMP35]], [[TMP37]]
677 ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <1 x i32> [[TMP38]], <1 x i32> poison, <1 x i32> zeroinitializer
678 ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP39]], <1 x i32> <i32 1>
679 ; CHECK-NEXT: ret <1 x i32> [[TMP40]]
682 %add.1 = add <8 x i32> %a, %b
683 %add.2 = add <8 x i32> %add.1, %c
684 %res = tail call <1 x i32> @llvm.matrix.multiply.v1i32.v8i32.v8i32(<8 x i32> %d, <8 x i32> %add.2, i32 1, i32 8, i32 1)
688 define <1 x i64> @dotproduct_i64_v8(<8 x i64> %a, <8 x i64> %b) {
689 ; CHECK-LABEL: @dotproduct_i64_v8(
691 ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x i64> [[A:%.*]], <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
692 ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x i64> [[B:%.*]], <8 x i64> poison, <1 x i32> zeroinitializer
693 ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 1>
694 ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 2>
695 ; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 3>
696 ; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 4>
697 ; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 5>
698 ; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 6>
699 ; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <8 x i64> [[B]], <8 x i64> poison, <1 x i32> <i32 7>
700 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i64> [[SPLIT1]], <1 x i64> poison, <1 x i32> zeroinitializer
701 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 0
702 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i64 0
703 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer
704 ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i64> [[SPLAT_SPLAT]], [[BLOCK]]
705 ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <1 x i64> [[SPLIT2]], <1 x i64> poison, <1 x i32> zeroinitializer
706 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 1
707 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i64 0
708 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT10]], <1 x i64> poison, <1 x i32> zeroinitializer
709 ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i64> [[SPLAT_SPLAT11]], [[BLOCK9]]
710 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i64> [[TMP1]], [[TMP3]]
711 ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <1 x i64> [[SPLIT3]], <1 x i64> poison, <1 x i32> zeroinitializer
712 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 2
713 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i64> poison, i64 [[TMP5]], i64 0
714 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT13]], <1 x i64> poison, <1 x i32> zeroinitializer
715 ; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i64> [[SPLAT_SPLAT14]], [[BLOCK12]]
716 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i64> [[TMP4]], [[TMP6]]
717 ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <1 x i64> [[SPLIT4]], <1 x i64> poison, <1 x i32> zeroinitializer
718 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 3
719 ; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x i64> poison, i64 [[TMP8]], i64 0
720 ; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT16]], <1 x i64> poison, <1 x i32> zeroinitializer
721 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i64> [[SPLAT_SPLAT17]], [[BLOCK15]]
722 ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i64> [[TMP7]], [[TMP9]]
723 ; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <1 x i64> [[SPLIT5]], <1 x i64> poison, <1 x i32> zeroinitializer
724 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 4
725 ; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x i64> poison, i64 [[TMP11]], i64 0
726 ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT19]], <1 x i64> poison, <1 x i32> zeroinitializer
727 ; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i64> [[SPLAT_SPLAT20]], [[BLOCK18]]
728 ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i64> [[TMP10]], [[TMP12]]
729 ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <1 x i64> [[SPLIT6]], <1 x i64> poison, <1 x i32> zeroinitializer
730 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 5
731 ; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x i64> poison, i64 [[TMP14]], i64 0
732 ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT22]], <1 x i64> poison, <1 x i32> zeroinitializer
733 ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i64> [[SPLAT_SPLAT23]], [[BLOCK21]]
734 ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i64> [[TMP13]], [[TMP15]]
735 ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <1 x i64> [[SPLIT7]], <1 x i64> poison, <1 x i32> zeroinitializer
736 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 6
737 ; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x i64> poison, i64 [[TMP17]], i64 0
738 ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT25]], <1 x i64> poison, <1 x i32> zeroinitializer
739 ; CHECK-NEXT: [[TMP18:%.*]] = mul <1 x i64> [[SPLAT_SPLAT26]], [[BLOCK24]]
740 ; CHECK-NEXT: [[TMP19:%.*]] = add <1 x i64> [[TMP16]], [[TMP18]]
741 ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <1 x i64> [[SPLIT8]], <1 x i64> poison, <1 x i32> zeroinitializer
742 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[SPLIT]], i64 7
743 ; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x i64> poison, i64 [[TMP20]], i64 0
744 ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT28]], <1 x i64> poison, <1 x i32> zeroinitializer
745 ; CHECK-NEXT: [[TMP21:%.*]] = mul <1 x i64> [[SPLAT_SPLAT29]], [[BLOCK27]]
746 ; CHECK-NEXT: [[TMP22:%.*]] = add <1 x i64> [[TMP19]], [[TMP21]]
747 ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x i64> [[TMP22]], <1 x i64> poison, <1 x i32> zeroinitializer
748 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x i64> poison, <1 x i64> [[TMP23]], <1 x i32> <i32 1>
749 ; CHECK-NEXT: ret <1 x i64> [[TMP24]]
752 %c = tail call <1 x i64> @llvm.matrix.multiply.v1i64.v8i64.v8i64(<8 x i64> %a, <8 x i64> %b, i32 1, i32 8, i32 1)
756 declare <1 x i64> @llvm.matrix.multiply.v1i64.v8i64.v8i64(<8 x i64>, <8 x i64>, i32, i32, i32)
758 define <1 x i16> @LoadInst_dot_product_i16_v6(ptr %lhs_address, ptr %rhs_address) {
759 ; CHECK-LABEL: @LoadInst_dot_product_i16_v6(
761 ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <6 x i16>, ptr [[LHS_ADDRESS:%.*]], align 16
762 ; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <1 x i16>, ptr [[RHS_ADDRESS:%.*]], align 16
763 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i16, ptr [[RHS_ADDRESS]], i64 1
764 ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <1 x i16>, ptr [[VEC_GEP]], align 2
765 ; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr i16, ptr [[RHS_ADDRESS]], i64 2
766 ; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <1 x i16>, ptr [[VEC_GEP3]], align 4
767 ; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr i16, ptr [[RHS_ADDRESS]], i64 3
768 ; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <1 x i16>, ptr [[VEC_GEP5]], align 2
769 ; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr i16, ptr [[RHS_ADDRESS]], i64 4
770 ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <1 x i16>, ptr [[VEC_GEP7]], align 8
771 ; CHECK-NEXT: [[VEC_GEP9:%.*]] = getelementptr i16, ptr [[RHS_ADDRESS]], i64 5
772 ; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <1 x i16>, ptr [[VEC_GEP9]], align 2
773 ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x i16> [[COL_LOAD1]], <1 x i16> poison, <1 x i32> zeroinitializer
774 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <6 x i16> [[COL_LOAD]], i64 0
775 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i16> poison, i16 [[TMP0]], i64 0
776 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT]], <1 x i16> poison, <1 x i32> zeroinitializer
777 ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i16> [[SPLAT_SPLAT]], [[BLOCK]]
778 ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <1 x i16> [[COL_LOAD2]], <1 x i16> poison, <1 x i32> zeroinitializer
779 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i16> [[COL_LOAD]], i64 1
780 ; CHECK-NEXT: [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i16> poison, i16 [[TMP2]], i64 0
781 ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT12]], <1 x i16> poison, <1 x i32> zeroinitializer
782 ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i16> [[SPLAT_SPLAT13]], [[BLOCK11]]
783 ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i16> [[TMP1]], [[TMP3]]
784 ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <1 x i16> [[COL_LOAD4]], <1 x i16> poison, <1 x i32> zeroinitializer
785 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i16> [[COL_LOAD]], i64 2
786 ; CHECK-NEXT: [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i16> poison, i16 [[TMP5]], i64 0
787 ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT15]], <1 x i16> poison, <1 x i32> zeroinitializer
788 ; CHECK-NEXT: [[TMP6:%.*]] = mul <1 x i16> [[SPLAT_SPLAT16]], [[BLOCK14]]
789 ; CHECK-NEXT: [[TMP7:%.*]] = add <1 x i16> [[TMP4]], [[TMP6]]
790 ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <1 x i16> [[COL_LOAD6]], <1 x i16> poison, <1 x i32> zeroinitializer
791 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i16> [[COL_LOAD]], i64 3
792 ; CHECK-NEXT: [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i16> poison, i16 [[TMP8]], i64 0
793 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT18]], <1 x i16> poison, <1 x i32> zeroinitializer
794 ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i16> [[SPLAT_SPLAT19]], [[BLOCK17]]
795 ; CHECK-NEXT: [[TMP10:%.*]] = add <1 x i16> [[TMP7]], [[TMP9]]
796 ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <1 x i16> [[COL_LOAD8]], <1 x i16> poison, <1 x i32> zeroinitializer
797 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <6 x i16> [[COL_LOAD]], i64 4
798 ; CHECK-NEXT: [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i16> poison, i16 [[TMP11]], i64 0
799 ; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT21]], <1 x i16> poison, <1 x i32> zeroinitializer
800 ; CHECK-NEXT: [[TMP12:%.*]] = mul <1 x i16> [[SPLAT_SPLAT22]], [[BLOCK20]]
801 ; CHECK-NEXT: [[TMP13:%.*]] = add <1 x i16> [[TMP10]], [[TMP12]]
802 ; CHECK-NEXT: [[BLOCK23:%.*]] = shufflevector <1 x i16> [[COL_LOAD10]], <1 x i16> poison, <1 x i32> zeroinitializer
803 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <6 x i16> [[COL_LOAD]], i64 5
804 ; CHECK-NEXT: [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i16> poison, i16 [[TMP14]], i64 0
805 ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT24]], <1 x i16> poison, <1 x i32> zeroinitializer
806 ; CHECK-NEXT: [[TMP15:%.*]] = mul <1 x i16> [[SPLAT_SPLAT25]], [[BLOCK23]]
807 ; CHECK-NEXT: [[TMP16:%.*]] = add <1 x i16> [[TMP13]], [[TMP15]]
808 ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <1 x i16> [[TMP16]], <1 x i16> poison, <1 x i32> zeroinitializer
809 ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <1 x i16> poison, <1 x i16> [[TMP17]], <1 x i32> <i32 1>
810 ; CHECK-NEXT: ret <1 x i16> [[TMP18]]
813 %lhs = load <6 x i16>, ptr %lhs_address
814 %rhs = load <6 x i16>, ptr %rhs_address
815 %result = tail call <1 x i16> @llvm.matrix.multiply.v1i16.v6i16.v6i16(<6 x i16> %lhs, <6 x i16> %rhs, i32 1, i32 6, i32 1)
816 ret <1 x i16> %result
819 declare <1 x i16> @llvm.matrix.multiply.v1i16.v6i16.v6i16(<6 x i16>, <6 x i16>, i32, i32, i32)