1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon,+lut,+bf16 | FileCheck %s
4 define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){
5 ; CHECK-LABEL: test_luti2_lane_i8:
7 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
8 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
9 ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
11 %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> %vn, <8 x i8> %vm, i32 0)
15 define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){
16 ; CHECK-LABEL: test_luti2_laneq_i8:
18 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
19 ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
21 %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> %vn, <16 x i8> %vm, i32 0)
25 define <16 x i8> @test_luti2q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
26 ; CHECK-LABEL: test_luti2q_lane_i8:
28 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
29 ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
31 %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
35 define <16 x i8> @test_luti2q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
36 ; CHECK-LABEL: test_luti2q_laneq_i8:
38 ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
40 %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> %vn, <16 x i8> %vm, i32 0)
44 define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
45 ; CHECK-LABEL: test_luti2_lane_i16:
47 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
48 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
49 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
51 %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
55 define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){
56 ; CHECK-LABEL: test_luti2_laneq_i16:
58 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
59 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
61 %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> %vn, <16 x i8> %vm, i32 0)
65 define <8 x i16> @test_luti2q_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
66 ; CHECK-LABEL: test_luti2q_lane_i16:
68 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
69 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
70 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
72 %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
76 define <8 x i16> @test_luti2q_laneq_i16(<8 x i16> %vn, <16 x i8> %vm){
77 ; CHECK-LABEL: test_luti2q_laneq_i16:
79 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
81 %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> %vn, <16 x i8> %vm, i32 0)
85 define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){
86 ; CHECK-LABEL: test_luti2_lane_f16:
88 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
89 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
90 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
92 %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> %vn, <8 x i8> %vm, i32 0)
96 define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){
97 ; CHECK-LABEL: test_luti2_laneq_f16:
99 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
100 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
102 %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4i16(<4 x half> %vn, <16 x i8> %vm, i32 0)
106 define <8 x half> @test_luti2q_lane_f16(<8 x half> %vn, <8 x i8> %vm){
107 ; CHECK-LABEL: test_luti2q_lane_f16:
109 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
110 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
112 %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> %vn, <8 x i8> %vm, i32 0)
116 define <8 x half> @test_luti2q_laneq_f16(<8 x half> %vn, <16 x i8> %vm){
117 ; CHECK-LABEL: test_luti2q_laneq_f16:
119 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
121 %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> %vn, <16 x i8> %vm, i32 0)
125 define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
126 ; CHECK-LABEL: test_luti2_lane_bf16:
128 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
129 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
130 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
132 %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
133 ret <8 x bfloat> %res
136 define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){
137 ; CHECK-LABEL: test_luti2_laneq_bf16:
139 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
140 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
142 %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> %vn, <16 x i8> %vm, i32 0)
143 ret <8 x bfloat> %res
146 define <8 x bfloat> @test_luti2q_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
147 ; CHECK-LABEL: test_luti2q_lane_bf16:
149 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
150 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
151 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
153 %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
154 ret <8 x bfloat> %res
157 define <8 x bfloat> @test_luti2q_laneq_bf16(<8 x bfloat> %vn, <16 x i8> %vm){
158 ; CHECK-LABEL: test_luti2q_laneq_bf16:
160 ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
162 %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> %vn, <16 x i8> %vm, i32 0)
163 ret <8 x bfloat> %res
166 define <16 x i8> @test_luti4q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
167 ; CHECK-LABEL: test_luti4q_lane_i8:
169 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
170 ; CHECK-NEXT: luti4 v0.16b, { v0.16b }, v1[0]
172 %res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
176 define <16 x i8> @test_luti4q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
177 ; CHECK-LABEL: test_luti4q_laneq_i8:
179 ; CHECK-NEXT: luti4 v0.16b, { v0.16b }, v1[0]
181 %res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> %vn, <16 x i8> %vm, i32 0)
185 define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm){
186 ; CHECK-LABEL: test_luti4q_lane_x2_i16:
188 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
189 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
190 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
191 ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
193 %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm, i32 1)
197 define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm){
198 ; CHECK-LABEL: test_luti4q_laneq_x2_i16:
200 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
201 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
202 ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
204 %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm, i32 1)
208 define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x i8> %vm){
209 ; CHECK-LABEL: test_luti4q_lane_x2_f16:
211 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
212 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
213 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
214 ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
216 %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <8 x i8> %vm, i32 1)
221 define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16 x i8> %vm){
222 ; CHECK-LABEL: test_luti4q_laneq_x2_f16:
224 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
225 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
226 ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
228 %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <16 x i8> %vm, i32 1)
232 define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <16 x i8> %vm){
233 ; CHECK-LABEL: test_luti4q_laneq_x2_bf16:
235 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
236 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
237 ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
239 %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <16 x i8> %vm, i32 1)
240 ret <8 x bfloat> %res
243 define <8 x bfloat> @test_luti4q_lane_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <8 x i8> %vm){
244 ; CHECK-LABEL: test_luti4q_lane_x2_bf16:
246 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
247 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
248 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
249 ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
251 %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <8 x i8> %vm, i32 1)
252 ret <8 x bfloat> %res