Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / aarch64-bf16-lane-intrinsics.c
blobccd6d17412a8b4096e143390a3859032bab3e5f9
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
3 // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-LE %s
4 // RUN: %clang_cc1 -triple aarch64_be -target-feature +neon -target-feature +bf16 \
5 // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-BE %s
7 // REQUIRES: aarch64-registered-target || arm-registered-target
9 #include <arm_neon.h>
11 // CHECK-LE-LABEL: @test_vcopy_lane_bf16_v1(
12 // CHECK-LE-NEXT: entry:
13 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 3
14 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 1
15 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
17 // CHECK-BE-LABEL: @test_vcopy_lane_bf16_v1(
18 // CHECK-BE-NEXT: entry:
19 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
20 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
21 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 3
22 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 1
23 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
24 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
26 bfloat16x4_t test_vcopy_lane_bf16_v1(bfloat16x4_t a, bfloat16x4_t b) {
27 return vcopy_lane_bf16(a, 1, b, 3);
30 // CHECK-LE-LABEL: @test_vcopy_lane_bf16_v2(
31 // CHECK-LE-NEXT: entry:
32 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
33 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 2
34 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
36 // CHECK-BE-LABEL: @test_vcopy_lane_bf16_v2(
37 // CHECK-BE-NEXT: entry:
38 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
39 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
40 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 0
41 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 2
42 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
43 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
45 bfloat16x4_t test_vcopy_lane_bf16_v2(bfloat16x4_t a, bfloat16x4_t b) {
46 return vcopy_lane_bf16(a, 2, b, 0);
49 // CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v1(
50 // CHECK-LE-NEXT: entry:
51 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 2
52 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 0
53 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
55 // CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v1(
56 // CHECK-BE-NEXT: entry:
57 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
58 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
59 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 2
60 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 0
61 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
62 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
64 bfloat16x8_t test_vcopyq_lane_bf16_v1(bfloat16x8_t a, bfloat16x4_t b) {
65 return vcopyq_lane_bf16(a, 0, b, 2);
68 // CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v2(
69 // CHECK-LE-NEXT: entry:
70 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0
71 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 6
72 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
74 // CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v2(
75 // CHECK-BE-NEXT: entry:
76 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
77 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
78 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 0
79 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 6
80 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
81 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
83 bfloat16x8_t test_vcopyq_lane_bf16_v2(bfloat16x8_t a, bfloat16x4_t b) {
84 return vcopyq_lane_bf16(a, 6, b, 0);
87 // CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v1(
88 // CHECK-LE-NEXT: entry:
89 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7
90 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 0
91 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
93 // CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v1(
94 // CHECK-BE-NEXT: entry:
95 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
96 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
97 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 7
98 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 0
99 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
100 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
102 bfloat16x4_t test_vcopy_laneq_bf16_v1(bfloat16x4_t a, bfloat16x8_t b) {
103 return vcopy_laneq_bf16(a, 0, b, 7);
106 // CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v2(
107 // CHECK-LE-NEXT: entry:
108 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 4
109 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3
110 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]]
112 // CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v2(
113 // CHECK-BE-NEXT: entry:
114 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
115 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
116 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 4
117 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3
118 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
119 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]]
121 bfloat16x4_t test_vcopy_laneq_bf16_v2(bfloat16x4_t a, bfloat16x8_t b) {
122 return vcopy_laneq_bf16(a, 3, b, 4);
125 // CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v1(
126 // CHECK-LE-NEXT: entry:
127 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7
128 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3
129 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
131 // CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v1(
132 // CHECK-BE-NEXT: entry:
133 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
134 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
135 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 7
136 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3
137 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
138 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
140 bfloat16x8_t test_vcopyq_laneq_bf16_v1(bfloat16x8_t a, bfloat16x8_t b) {
141 return vcopyq_laneq_bf16(a, 3, b, 7);
145 // CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v2(
146 // CHECK-LE-NEXT: entry:
147 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 2
148 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 6
149 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]]
151 // CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v2(
152 // CHECK-BE-NEXT: entry:
153 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
154 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
155 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 2
156 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 6
157 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
158 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]]
160 bfloat16x8_t test_vcopyq_laneq_bf16_v2(bfloat16x8_t a, bfloat16x8_t b) {
161 return vcopyq_laneq_bf16(a, 6, b, 2);