1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2 // RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature -fullfp16 \
3 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
4 // RUN: | opt -S -passes=sroa \
5 // RUN: | FileCheck %s --check-prefixes=CHECK-NOFP16
6 // RUN: %clang_cc1 -triple armv8a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
7 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
8 // RUN: | opt -S -passes=sroa \
9 // RUN: | FileCheck %s --check-prefixes=CHECK-FP16
11 // REQUIRES: arm-registered-target
15 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vbsl_f16(
16 // CHECK-NOFP16-SAME: <4 x i16> noundef [[A:%.*]], <2 x i32> noundef [[B_COERCE:%.*]], <2 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
17 // CHECK-NOFP16-NEXT: entry:
18 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
19 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C_COERCE]] to <4 x half>
20 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
21 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
22 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
23 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
24 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
25 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
26 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
27 // CHECK-NOFP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> [[TMP8]])
28 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half>
29 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <2 x i32>
30 // CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <4 x half>
31 // CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <2 x i32>
32 // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP12]]
34 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vbsl_f16(
35 // CHECK-FP16-SAME: <4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
36 // CHECK-FP16-NEXT: entry:
37 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
38 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
39 // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
40 // CHECK-FP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
41 // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half>
42 // CHECK-FP16-NEXT: ret <4 x half> [[TMP3]]
44 float16x4_t
test_vbsl_f16(uint16x4_t a
, float16x4_t b
, float16x4_t c
) {
45 return vbsl_f16(a
, b
, c
);
48 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vbslq_f16(
49 // CHECK-NOFP16-SAME: <8 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B_COERCE:%.*]], <4 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0]] {
50 // CHECK-NOFP16-NEXT: entry:
51 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
52 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[C_COERCE]] to <8 x half>
53 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
54 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
55 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
56 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
57 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
58 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
59 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
60 // CHECK-NOFP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
61 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half>
62 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <4 x i32>
63 // CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x half>
64 // CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <4 x i32>
65 // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP12]]
67 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vbslq_f16(
68 // CHECK-FP16-SAME: <8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
69 // CHECK-FP16-NEXT: entry:
70 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
71 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
72 // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
73 // CHECK-FP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
74 // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half>
75 // CHECK-FP16-NEXT: ret <8 x half> [[TMP3]]
77 float16x8_t
test_vbslq_f16(uint16x8_t a
, float16x8_t b
, float16x8_t c
) {
78 return vbslq_f16(a
, b
, c
);
81 // CHECK-NOFP16-LABEL: define dso_local void @test_vzip_f16(
82 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
83 // CHECK-NOFP16-NEXT: entry:
84 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
85 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
86 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
87 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
88 // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
89 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
90 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
91 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
92 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
93 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
94 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
95 // CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
96 // CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
97 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
98 // CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
99 // CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META3]]
100 // CHECK-NOFP16-NEXT: ret void
102 // CHECK-FP16-LABEL: define dso_local void @test_vzip_f16(
103 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
104 // CHECK-FP16-NEXT: entry:
105 // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
106 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
107 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
108 // CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
109 // CHECK-FP16-NEXT: store <4 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
110 // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
111 // CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
112 // CHECK-FP16-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META3]]
113 // CHECK-FP16-NEXT: ret void
115 float16x4x2_t
test_vzip_f16(float16x4_t a
, float16x4_t b
) {
116 return vzip_f16(a
, b
);
119 // CHECK-NOFP16-LABEL: define dso_local void @test_vzipq_f16(
120 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
121 // CHECK-NOFP16-NEXT: entry:
122 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
123 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
124 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
125 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
126 // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
127 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
128 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
129 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
130 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
131 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
132 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
133 // CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
134 // CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
135 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
136 // CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
137 // CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META6]]
138 // CHECK-NOFP16-NEXT: ret void
140 // CHECK-FP16-LABEL: define dso_local void @test_vzipq_f16(
141 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
142 // CHECK-FP16-NEXT: entry:
143 // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
144 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
145 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
146 // CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
147 // CHECK-FP16-NEXT: store <8 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
148 // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
149 // CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
150 // CHECK-FP16-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META6]]
151 // CHECK-FP16-NEXT: ret void
153 float16x8x2_t
test_vzipq_f16(float16x8_t a
, float16x8_t b
) {
154 return vzipq_f16(a
, b
);
157 // CHECK-NOFP16-LABEL: define dso_local void @test_vuzp_f16(
158 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
159 // CHECK-NOFP16-NEXT: entry:
160 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
161 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
162 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
163 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
164 // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
165 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
166 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
167 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
168 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
169 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
170 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
171 // CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
172 // CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
173 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
174 // CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
175 // CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META9]]
176 // CHECK-NOFP16-NEXT: ret void
178 // CHECK-FP16-LABEL: define dso_local void @test_vuzp_f16(
179 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
180 // CHECK-FP16-NEXT: entry:
181 // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
182 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
183 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
184 // CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
185 // CHECK-FP16-NEXT: store <4 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
186 // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
187 // CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
188 // CHECK-FP16-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META9]]
189 // CHECK-FP16-NEXT: ret void
191 float16x4x2_t
test_vuzp_f16(float16x4_t a
, float16x4_t b
) {
192 return vuzp_f16(a
, b
);
195 // CHECK-NOFP16-LABEL: define dso_local void @test_vuzpq_f16(
196 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
197 // CHECK-NOFP16-NEXT: entry:
198 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
199 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
200 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
201 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
202 // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
203 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
204 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
205 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
206 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
207 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
208 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
209 // CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
210 // CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
211 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
212 // CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
213 // CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META12]]
214 // CHECK-NOFP16-NEXT: ret void
216 // CHECK-FP16-LABEL: define dso_local void @test_vuzpq_f16(
217 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
218 // CHECK-FP16-NEXT: entry:
219 // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
220 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
221 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
222 // CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
223 // CHECK-FP16-NEXT: store <8 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
224 // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
225 // CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
226 // CHECK-FP16-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META12]]
227 // CHECK-FP16-NEXT: ret void
229 float16x8x2_t
test_vuzpq_f16(float16x8_t a
, float16x8_t b
) {
230 return vuzpq_f16(a
, b
);
233 // CHECK-NOFP16-LABEL: define dso_local void @test_vtrn_f16(
234 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
235 // CHECK-NOFP16-NEXT: entry:
236 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
237 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
238 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
239 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
240 // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
241 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
242 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
243 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
244 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
245 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
246 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
247 // CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
248 // CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
249 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
250 // CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
251 // CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META15]]
252 // CHECK-NOFP16-NEXT: ret void
254 // CHECK-FP16-LABEL: define dso_local void @test_vtrn_f16(
255 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
256 // CHECK-FP16-NEXT: entry:
257 // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
258 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
259 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
260 // CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
261 // CHECK-FP16-NEXT: store <4 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
262 // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
263 // CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
264 // CHECK-FP16-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META15]]
265 // CHECK-FP16-NEXT: ret void
267 float16x4x2_t
test_vtrn_f16(float16x4_t a
, float16x4_t b
) {
268 return vtrn_f16(a
, b
);
271 // CHECK-NOFP16-LABEL: define dso_local void @test_vtrnq_f16(
272 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
273 // CHECK-NOFP16-NEXT: entry:
274 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
275 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
276 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
277 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
278 // CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
279 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
280 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
281 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
282 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
283 // CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
284 // CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
285 // CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
286 // CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
287 // CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
288 // CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
289 // CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META18]]
290 // CHECK-NOFP16-NEXT: ret void
292 // CHECK-FP16-LABEL: define dso_local void @test_vtrnq_f16(
293 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
294 // CHECK-FP16-NEXT: entry:
295 // CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
296 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
297 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
298 // CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
299 // CHECK-FP16-NEXT: store <8 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
300 // CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
301 // CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
302 // CHECK-FP16-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META18]]
303 // CHECK-FP16-NEXT: ret void
305 float16x8x2_t
test_vtrnq_f16(float16x8_t a
, float16x8_t b
) {
306 return vtrnq_f16(a
, b
);
309 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vmov_n_f16(
310 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
311 // CHECK-NOFP16-NEXT: entry:
312 // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
313 // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
314 // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
315 // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
316 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32>
317 // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]]
319 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vmov_n_f16(
320 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
321 // CHECK-FP16-NEXT: entry:
322 // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
323 // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
324 // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
325 // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
326 // CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]]
328 float16x4_t
test_vmov_n_f16(float16_t a
) {
329 return vmov_n_f16(a
);
332 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vmovq_n_f16(
333 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
334 // CHECK-NOFP16-NEXT: entry:
335 // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
336 // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
337 // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
338 // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
339 // CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
340 // CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
341 // CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
342 // CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
343 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32>
344 // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]]
346 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vmovq_n_f16(
347 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
348 // CHECK-FP16-NEXT: entry:
349 // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
350 // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
351 // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
352 // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
353 // CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
354 // CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
355 // CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
356 // CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
357 // CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]]
359 float16x8_t
test_vmovq_n_f16(float16_t a
) {
360 return vmovq_n_f16(a
);
363 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_n_f16(
364 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
365 // CHECK-NOFP16-NEXT: entry:
366 // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
367 // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
368 // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
369 // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
370 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32>
371 // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]]
373 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_n_f16(
374 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
375 // CHECK-FP16-NEXT: entry:
376 // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
377 // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
378 // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
379 // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
380 // CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]]
382 float16x4_t
test_vdup_n_f16(float16_t a
) {
383 return vdup_n_f16(a
);
386 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_n_f16(
387 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
388 // CHECK-NOFP16-NEXT: entry:
389 // CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
390 // CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
391 // CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
392 // CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
393 // CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
394 // CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
395 // CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
396 // CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
397 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32>
398 // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]]
400 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_n_f16(
401 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
402 // CHECK-FP16-NEXT: entry:
403 // CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
404 // CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
405 // CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
406 // CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
407 // CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
408 // CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
409 // CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
410 // CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
411 // CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]]
413 float16x8_t
test_vdupq_n_f16(float16_t a
) {
414 return vdupq_n_f16(a
);
417 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_lane_f16(
418 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
419 // CHECK-NOFP16-NEXT: entry:
420 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
421 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
422 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
423 // CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
424 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
425 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <2 x i32>
426 // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP4]]
428 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_lane_f16(
429 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
430 // CHECK-FP16-NEXT: entry:
431 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
432 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
433 // CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
434 // CHECK-FP16-NEXT: ret <4 x half> [[LANE]]
436 float16x4_t
test_vdup_lane_f16(float16x4_t a
) {
437 return vdup_lane_f16(a
, 3);
440 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_lane_f16(
441 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
442 // CHECK-NOFP16-NEXT: entry:
443 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
444 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
445 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
446 // CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
447 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
448 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <4 x i32>
449 // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP4]]
451 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_lane_f16(
452 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
453 // CHECK-FP16-NEXT: entry:
454 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
455 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
456 // CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
457 // CHECK-FP16-NEXT: ret <8 x half> [[LANE]]
459 float16x8_t
test_vdupq_lane_f16(float16x4_t a
) {
460 return vdupq_lane_f16(a
, 3);
463 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vext_f16(
464 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
465 // CHECK-NOFP16-NEXT: entry:
466 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
467 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
468 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
469 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
470 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
471 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
472 // CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
473 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[VEXT]] to <4 x half>
474 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <2 x i32>
475 // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP7]]
477 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vext_f16(
478 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
479 // CHECK-FP16-NEXT: entry:
480 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
481 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
482 // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
483 // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
484 // CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
485 // CHECK-FP16-NEXT: ret <4 x half> [[VEXT]]
487 float16x4_t
test_vext_f16(float16x4_t a
, float16x4_t b
) {
488 return vext_f16(a
, b
, 2);
491 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vextq_f16(
492 // CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
493 // CHECK-NOFP16-NEXT: entry:
494 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
495 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
496 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <16 x i8>
497 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <16 x i8>
498 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
499 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
500 // CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
501 // CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[VEXT]] to <8 x half>
502 // CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <4 x i32>
503 // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP7]]
505 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vextq_f16(
506 // CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
507 // CHECK-FP16-NEXT: entry:
508 // CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
509 // CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
510 // CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
511 // CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
512 // CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
513 // CHECK-FP16-NEXT: ret <8 x half> [[VEXT]]
515 float16x8_t
test_vextq_f16(float16x8_t a
, float16x8_t b
) {
516 return vextq_f16(a
, b
, 5);
519 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vrev64_f16(
520 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
521 // CHECK-NOFP16-NEXT: entry:
522 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
523 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
524 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
525 // CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
526 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <2 x i32>
527 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
528 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <2 x i32>
529 // CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP5]]
531 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vrev64_f16(
532 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
533 // CHECK-FP16-NEXT: entry:
534 // CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
535 // CHECK-FP16-NEXT: ret <4 x half> [[SHUFFLE_I]]
537 float16x4_t
test_vrev64_f16(float16x4_t a
) {
538 return vrev64_f16(a
);
541 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vrev64q_f16(
542 // CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
543 // CHECK-NOFP16-NEXT: entry:
544 // CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
545 // CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
546 // CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x half>
547 // CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
548 // CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[SHUFFLE_I]] to <4 x i32>
549 // CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
550 // CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <4 x i32>
551 // CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP5]]
553 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vrev64q_f16(
554 // CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
555 // CHECK-FP16-NEXT: entry:
556 // CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
557 // CHECK-FP16-NEXT: ret <8 x half> [[SHUFFLE_I]]
559 float16x8_t
test_vrev64q_f16(float16x8_t a
) {
560 return vrev64q_f16(a
);
563 // CHECK-NOFP16: [[META3]] = !{[[META4:![0-9]+]]}
564 // CHECK-NOFP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"}
565 // CHECK-NOFP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"}
566 // CHECK-NOFP16: [[META6]] = !{[[META7:![0-9]+]]}
567 // CHECK-NOFP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"}
568 // CHECK-NOFP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"}
569 // CHECK-NOFP16: [[META9]] = !{[[META10:![0-9]+]]}
570 // CHECK-NOFP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"}
571 // CHECK-NOFP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"}
572 // CHECK-NOFP16: [[META12]] = !{[[META13:![0-9]+]]}
573 // CHECK-NOFP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"}
574 // CHECK-NOFP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"}
575 // CHECK-NOFP16: [[META15]] = !{[[META16:![0-9]+]]}
576 // CHECK-NOFP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"}
577 // CHECK-NOFP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"}
578 // CHECK-NOFP16: [[META18]] = !{[[META19:![0-9]+]]}
579 // CHECK-NOFP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"}
580 // CHECK-NOFP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"}
582 // CHECK-FP16: [[META3]] = !{[[META4:![0-9]+]]}
583 // CHECK-FP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"}
584 // CHECK-FP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"}
585 // CHECK-FP16: [[META6]] = !{[[META7:![0-9]+]]}
586 // CHECK-FP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"}
587 // CHECK-FP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"}
588 // CHECK-FP16: [[META9]] = !{[[META10:![0-9]+]]}
589 // CHECK-FP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"}
590 // CHECK-FP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"}
591 // CHECK-FP16: [[META12]] = !{[[META13:![0-9]+]]}
592 // CHECK-FP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"}
593 // CHECK-FP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"}
594 // CHECK-FP16: [[META15]] = !{[[META16:![0-9]+]]}
595 // CHECK-FP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"}
596 // CHECK-FP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"}
597 // CHECK-FP16: [[META18]] = !{[[META19:![0-9]+]]}
598 // CHECK-FP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"}
599 // CHECK-FP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"}